Changeset 5486


Ignore:
Timestamp:
May 31, 2017, 4:25:33 PM (21 months ago)
Author:
nmedfort
Message:

Initial attempt to improve debugging capabilities with compilation stack traces on error.

Location:
icGREP/icgrep-devel/icgrep
Files:
1 added
37 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r5476 r5486  
    88set (icgrep_VERSION_MINOR 6)
    99
    10 
    11 
    1210option(ENABLE_MULTIPLEXING "Compiling the Multiplexing Module")
    13 option(PRINT_TIMING_INFORMATION "Write compilation and execution timing information to standard error stream")
    1411option(DISABLE_DUAL_ABI "Disable GCC Dual ABI support" OFF)
    1512find_package(LLVM REQUIRED CONFIG)
     
    2926link_directories( ${LLVM_LIBRARY_DIRS} )
    3027add_definitions( ${LLVM_DEFINITIONS} )
     28
     29# include(FindCUDA) # https://cmake.org/cmake/help/v3.0/module/FindCUDA.html
    3130
    3231option(ENABLE_CUDA_COMPILE "Compiling with CUDA")
     
    104103add_executable(array-test array-test.cpp kernels/alignedprint.cpp)
    105104add_executable(lz4d lz4d.cpp lz4FrameDecoder.cpp kernels/cc_kernel.cpp kernels/lz4_index_decoder.cpp kernels/lz4_bytestream_decoder.cpp)
    106 
    107 ## IWYU detects superfluous includes and when the include can be replaced with a forward declaration.
    108 ## It can be obtained using "apt-get install iwyu" or from "github.com/include-what-you-use".
    109 
    110 #find_program(IWYU_PATH NAMES include-what-you-use iwyu)
    111 #if(IWYU_PATH)
    112 #cmake_minimum_required(VERSION 3.3 FATAL_ERROR)
    113 #execute_process(COMMAND ${CMAKE_CXX_COMPILER} -print-libgcc-file-name OUTPUT_VARIABLE LIBGCC_FILE)
    114 #get_filename_component(LIBGCC_PATH ${LIBGCC_FILE} DIRECTORY)
    115 #include_directories("${LIBGCC_PATH}/include")
    116 #set_property(TARGET CodeGen PROPERTY CXX_INCLUDE_WHAT_YOU_USE ${IWYU_PATH})
    117 #set_property(TARGET PabloADT PROPERTY CXX_INCLUDE_WHAT_YOU_USE ${IWYU_PATH})
    118 #set_property(TARGET RegExpADT PROPERTY CXX_INCLUDE_WHAT_YOU_USE ${IWYU_PATH})
    119 #set_property(TARGET RegExpCompiler PROPERTY CXX_INCLUDE_WHAT_YOU_USE ${IWYU_PATH})
    120 #set_property(TARGET CCADT PROPERTY CXX_INCLUDE_WHAT_YOU_USE ${IWYU_PATH})
    121 #set_property(TARGET UCDlib PROPERTY CXX_INCLUDE_WHAT_YOU_USE ${IWYU_PATH})
    122 #set_property(TARGET icgrep PROPERTY CXX_INCLUDE_WHAT_YOU_USE ${IWYU_PATH})
    123 #set_property(TARGET u8u16 PROPERTY CXX_INCLUDE_WHAT_YOU_USE ${IWYU_PATH})
    124 #set_property(TARGET base64 PROPERTY CXX_INCLUDE_WHAT_YOU_USE ${IWYU_PATH})
    125 #set_property(TARGET wc PROPERTY CXX_INCLUDE_WHAT_YOU_USE ${IWYU_PATH})
    126 #set_property(TARGET editd PROPERTY CXX_INCLUDE_WHAT_YOU_USE ${IWYU_PATH})
    127 #set_property(TARGET array-test PROPERTY CXX_INCLUDE_WHAT_YOU_USE ${IWYU_PATH})
    128 #endif()
    129 
    130 IF (PRINT_TIMING_INFORMATION)
    131     find_package(PAPI REQUIRED)
    132     include_directories(${PAPI_INCLUDE_DIRS})
    133     target_link_libraries(icgrep ${PAPI_LIBRARIES})
    134 ENDIF()
    135105
    136106target_link_libraries (icgrep UCDlib PabloADT RegExpCompiler CCADT CodeGen ${REQ_LLVM_LIBRARIES} ${Boost_LIBRARIES} ${CUDA_LIB})
     
    200170ENDIF()
    201171
     172include(CheckIncludeFileCXX)
     173CHECK_INCLUDE_FILE_CXX(sanitizer/asan_interface.h HAS_ADDRESS_SANITIZER)
     174
     175find_package(Libunwind)
     176IF (LIBUNWIND_FOUND)
     177SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_LIBUNWIND")
     178include_directories(${LIBUNWIND_INCLUDE_DIR})
     179target_link_libraries(CodeGen ${LIBUNWIND_LIBRARIES})
     180ENDIF()
    202181
    203182SET(CMAKE_REQUIRED_FLAGS)
     
    216195SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DPARABIX_VERSION='\"${Parabix_REVISION}\"'")
    217196
    218 IF (PRINT_TIMING_INFORMATION)
    219     SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DPRINT_TIMING_INFORMATION")
    220 ENDIF()
    221 
    222 SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} -O3 -DNDEBUG")
    223 SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS} -g -fno-omit-frame-pointer -fno-optimize-sibling-calls -fsanitize=address")
     197SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE} -O3 -DNDEBUG")
     198
     199SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_DEBUG} -O1 -g -fno-omit-frame-pointer -fno-optimize-sibling-calls")
     200IF (HAS_ADDRESS_SANITIZER)
     201SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DHAS_ADDRESS_SANITIZER -fsanitize=address")
     202ENDIF()
     203
    224204
    225205add_test(
  • icGREP/icgrep-devel/icgrep/IR_Gen/CBuilder.cpp

    r5474 r5486  
    1212#include <llvm/IR/MDBuilder.h>
    1313#include <llvm/Support/raw_ostream.h>
     14#include <llvm/Support/Format.h>
    1415#include <toolchain/toolchain.h>
    1516#include <toolchain/driver.h>
     17#include <set>
     18#include <thread>
    1619#include <stdlib.h>
    1720#include <sys/mman.h>
    1821#include <unistd.h>
     22#include <stdio.h>
     23#ifdef HAS_ADDRESS_SANITIZER
     24#include <sanitizer/asan_interface.h>
     25#endif
     26#ifdef HAVE_LIBUNWIND
     27#define UNW_LOCAL_ONLY
     28#include <libunwind.h>
     29#else
     30using unw_word_t = uint64_t;
     31#endif
    1932
    2033using namespace llvm;
     34
     35void __report_failure(const char * msg, const unw_word_t * trace) {
     36    raw_fd_ostream out(STDERR_FILENO, false);
     37    #ifdef HAVE_LIBUNWIND
     38    if (trace) {
     39        out.changeColor(raw_fd_ostream::WHITE, true);
     40        out << "Compilation Stacktrace:\n";
     41        out.resetColor();
     42        while (*trace) {
     43            const auto pc = *trace++;
     44            out << format_hex(pc, 16) << "   ";
     45            const auto len = codegen::ProgramName.length() + 32;
     46            char cmd[len];
     47            snprintf(cmd, len,"addr2line -fpCe %s %p", codegen::ProgramName.data(), reinterpret_cast<void *>(pc));
     48            FILE * f = popen(cmd, "r");
     49            if (f) {
     50                char buffer[1024] = {0};
     51                while(fgets(buffer, sizeof(buffer), f)) {
     52                    out << buffer;
     53                }
     54                pclose(f);
     55            }
     56            ++trace;
     57        }
     58    }
     59    #endif
     60    out.changeColor(raw_fd_ostream::WHITE, true);
     61    out << "Assertion `" << msg << "' failed.\n";
     62    out.resetColor();
     63    out.flush();
     64}
    2165
    2266Value * CBuilder::CreateOpenCall(Value * filename, Value * oflag, Value * mode) {
     
    74118}
    75119
    76 
    77120Value * CBuilder::CreateUnlinkCall(Value * path) {
    78121    Module * const m = getModule();
     
    95138}
    96139
    97 
    98140Value * CBuilder::CreateStrlenCall(Value * str) {
    99141    Module * const m = getModule();
     
    104146    return CreateCall(strlenFn, str);
    105147}
    106 
    107148
    108149Function * CBuilder::GetPrintf() {
     
    224265    Function * f = m->getFunction("malloc");
    225266    if (f == nullptr) {
    226         // f = LinkFunction("malloc", &malloc);
    227267        PointerType * const voidPtrTy = getVoidPtrTy();
    228268        FunctionType * fty = FunctionType::get(voidPtrTy, {sizeTy}, false);
     
    269309    }
    270310    #ifdef STDLIB_HAS_ALIGNED_ALLOC
    271     Value * const ptr = CreateCall(f, {align, size});
     311    CallInst * const ptr = CreateCall(f, {align, size});
     312    ptr->setTailCall();
    272313    #else
    273314    Value * ptr = CreateAlloca(voidPtrTy);
    274     Value * success = CreateCall(f, {ptr, align, size});
     315    CallInst * success = CreateCall(f, {ptr, align, size});
     316    success->setTailCall();
    275317    if (codegen::EnableAsserts) {
    276         CreateAssert(CreateICmpEQ(success, getInt32(0)), "CreateAlignedMalloc: posix_memalign reported bad allocation");
     318        CreateAssert(CreateICmpEQ(success, getInt32(0)),
     319                     "CreateAlignedMalloc: posix_memalign reported bad allocation");
    277320    }
    278321    ptr = CreateLoad(ptr);
     
    281324    return ptr;
    282325}
    283 
    284326
    285327Value * CBuilder::CreateRealloc(Value * const ptr, Value * size) {
     
    294336        f->setDoesNotAlias(0);
    295337        f->setDoesNotAlias(1);
    296         // f = LinkFunction("realloc", &realloc);
    297338    }
    298339    Value * const addr = CreatePointerCast(ptr, voidPtrTy);
     
    312353        f = Function::Create(fty, Function::ExternalLinkage, "free", m);
    313354        f->setCallingConv(CallingConv::C);
    314         // f = LinkFunction("free", &std::free);
    315355    }
    316356    ptr = CreatePointerCast(ptr, voidPtrTy);
     
    354394    Value * ptr = CreateCall(fMMap, {addr, size, prot, flags, fd, offset});
    355395    if (codegen::EnableAsserts) {
    356         CreateAssert(CheckMMapSuccess(ptr), "CreateMMap: mmap failed to allocate memory");
     396        DataLayout DL(m);
     397        IntegerType * const intTy = getIntPtrTy(DL);
     398        Value * success = CreateICmpNE(CreatePtrToInt(addr, intTy), ConstantInt::getAllOnesValue(intTy)); // MAP_FAILED = -1
     399        CreateAssert(success, "CreateMMap: mmap failed to allocate memory");
    357400    }
    358401    return ptr;
     
    417460}
    418461
    419 Value * CBuilder::CheckMMapSuccess(Value * const addr) {
    420     Module * const m = getModule();
    421     DataLayout DL(m);
    422     IntegerType * const intTy = getIntPtrTy(DL);
    423     return CreateICmpNE(CreatePtrToInt(addr, intTy), ConstantInt::getAllOnesValue(intTy)); // MAP_FAILED = -1
    424 }
    425 
    426462#ifndef MREMAP_MAYMOVE
    427463#define MREMAP_MAYMOVE  1
     
    448484        ptr = CreateCall(fMRemap, {addr, oldSize, newSize, flags});
    449485        if (codegen::EnableAsserts) {
    450             CreateAssert(CheckMMapSuccess(ptr), "CreateMRemap: mremap failed to allocate memory");
     486            Value * success = CreateICmpNE(CreatePtrToInt(addr, intTy), ConstantInt::getAllOnesValue(intTy)); // MAP_FAILED = -1
     487            CreateAssert(success, "CreateMRemap: mremap failed to allocate memory");
    451488        }
    452489    } else { // no OS mremap support
     
    594631}
    595632
     633Value * CBuilder::CreatePThreadYield() {
     634    Module * const m = getModule();
     635    Function * f = m->getFunction("pthread_yield");
     636    if (f == nullptr) {
     637        FunctionType * fty = FunctionType::get(getInt32Ty(), false);
     638        f = Function::Create(fty, Function::ExternalLinkage, "pthread_yield", m);
     639        f->setCallingConv(CallingConv::C);
     640    }
     641    return CreateCall(f);
     642}
     643
    596644Value * CBuilder::CreatePThreadExitCall(Value * value_ptr) {
    597645    Module * const m = getModule();
     
    612660    Function * pthreadJoinFunc = m->getFunction("pthread_join");
    613661    if (pthreadJoinFunc == nullptr) {
    614         Type * pthreadTy = getSizeTy();
    615         FunctionType * fty = FunctionType::get(getInt32Ty(), {pthreadTy, getVoidPtrTy()->getPointerTo()}, false);
     662        FunctionType * fty = FunctionType::get(getInt32Ty(), {getSizeTy(), getVoidPtrTy()->getPointerTo()}, false);
    616663        pthreadJoinFunc = Function::Create(fty, Function::ExternalLinkage, "pthread_join", m);
    617664        pthreadJoinFunc->setCallingConv(CallingConv::C);
     
    620667}
    621668
    622 void CBuilder::CreateAssert(Value * const assertion, StringRef failureMessage) {   
    623     if (codegen::EnableAsserts) {
     669void CBuilder::CreateAssert(Value * assertion, StringRef failureMessage) {
     670    if (LLVM_UNLIKELY(codegen::EnableAsserts)) {
    624671        Module * const m = getModule();
    625         Function * function = m->getFunction("__assert");
     672        Function * function = m->getFunction("assert");
     673        IntegerType * const int1Ty = getInt1Ty();
    626674        if (LLVM_UNLIKELY(function == nullptr)) {
    627675            auto ip = saveIP();
    628             FunctionType * fty = FunctionType::get(getVoidTy(), { getInt1Ty(), getInt8PtrTy(), getSizeTy() }, false);
    629             function = Function::Create(fty, Function::PrivateLinkage, "__assert", m);
     676            PointerType * const int8PtrTy = getInt8PtrTy();
     677            IntegerType * const unwWordTy = TypeBuilder<unw_word_t, false>::get(getContext());
     678            PointerType * const unwWordPtrTy = unwWordTy->getPointerTo();
     679            FunctionType * fty = FunctionType::get(getVoidTy(), { int1Ty, int8PtrTy, unwWordPtrTy }, false);
     680            function = Function::Create(fty, Function::PrivateLinkage, "assert", m);
    630681            function->setDoesNotThrow();
    631682            function->setDoesNotAlias(2);
     
    635686            auto arg = function->arg_begin();
    636687            arg->setName("assertion");
    637             Value * e = &*arg++;
     688            Value * assertion = &*arg++;
    638689            arg->setName("msg");
    639690            Value * msg = &*arg++;
    640             arg->setName("sz");
    641             Value * sz = &*arg;
     691            arg->setName("trace");
     692            Value * trace = &*arg++;
    642693            SetInsertPoint(entry);
    643             CreateCondBr(e, failure, success);
    644             SetInsertPoint(failure);
    645             Value * len = CreateAdd(sz, getSize(21));
    646             ConstantInt * _11 = getSize(11);
    647             Value * bytes = CreatePointerCast(CreateMalloc(len), getInt8PtrTy());
    648             CreateMemCpy(bytes, GetString("Assertion `"), _11, 1);
    649             CreateMemCpy(CreateGEP(bytes, _11), msg, sz, 1);
    650             CreateMemCpy(CreateGEP(bytes, CreateAdd(sz, _11)), GetString("' failed.\n"), getSize(10), 1);
    651             CreateWriteCall(getInt32(2), bytes, len);
    652 
    653 
     694            IRBuilder<>::CreateCondBr(assertion, success, failure);
     695            IRBuilder<>::SetInsertPoint(failure);
     696            IRBuilder<>::CreateCall(LinkFunction("__report_failure", __report_failure), { msg, trace });
    654697            CreateExit(-1);
    655             CreateBr(success); // necessary to satisfy the LLVM verifier. this is not actually executed.
     698            IRBuilder<>::CreateBr(success); // necessary to satisfy the LLVM verifier. this is never executed.
    656699            SetInsertPoint(success);
    657             CreateRetVoid();
     700            IRBuilder<>::CreateRetVoid();
    658701            restoreIP(ip);
    659702        }
    660         CreateCall(function, {CreateICmpEQ(assertion, Constant::getNullValue(assertion->getType())), GetString(failureMessage), getSize(failureMessage.size())});
     703        if (assertion->getType() != int1Ty) {
     704            assertion = CreateICmpNE(assertion, Constant::getNullValue(assertion->getType()));
     705        }
     706        IntegerType * const unwWordTy = TypeBuilder<unw_word_t, false>::get(getContext());
     707        PointerType * const unwWordPtrTy = unwWordTy->getPointerTo();
     708        #ifdef HAVE_LIBUNWIND
     709        std::vector<unw_word_t> stack;
     710        unw_context_t context;
     711        unw_cursor_t cursor;
     712        // Initialize cursor to current frame for local unwinding.
     713        unw_getcontext(&context);
     714        unw_init_local(&cursor, &context);
     715        // Unwind frames one by one, going up the frame stack.
     716        stack.reserve(64);
     717        while (unw_step(&cursor) > 0) {
     718            unw_word_t pc;
     719            unw_get_reg(&cursor, UNW_REG_IP, &pc);
     720            stack.push_back(pc);
     721            if (pc == 0) {
     722                break;
     723            }
     724        }
     725        GlobalVariable * global = nullptr;
     726        const auto n = stack.size();
     727        IntegerType * const unwTy = TypeBuilder<unw_word_t, false>::get(getContext());
     728        for (GlobalVariable & gv : m->getGlobalList()) {
     729            Type * const ty = gv.getValueType();
     730            if (ty->isArrayTy() && ty->getArrayElementType() == unwTy && ty->getArrayNumElements() == n) {
     731                const ConstantDataArray * const array = cast<ConstantDataArray>(gv.getOperand(0));
     732                bool found = true;
     733                for (auto i = n - 1; i != 0; --i) {
     734                    if (LLVM_LIKELY(array->getElementAsInteger(i) != stack[i])) {
     735                        found = false;
     736                        break;
     737                    }
     738                }
     739                if (LLVM_UNLIKELY(found)) {
     740                    global = &gv;
     741                    break;
     742                }
     743            }
     744        }
     745        if (LLVM_LIKELY(global == nullptr)) {
     746            Constant * const initializer = ConstantDataArray::get(getContext(), stack);
     747            global = new GlobalVariable(*m, initializer->getType(), true, GlobalVariable::InternalLinkage, initializer);
     748        }
     749        Value * const trace = CreatePointerCast(global, unwWordPtrTy);
     750        #else
     751        Constant * const trace = ConstantPointerNull::get(unwWordPtrTy);
     752        #endif
     753        IRBuilder<>::CreateCall(function, {assertion, GetString(failureMessage), trace});
    661754    }
    662755}
     
    674767}
    675768
    676 llvm::BasicBlock * CBuilder::CreateBasicBlock(std::string && name) {
     769BasicBlock * CBuilder::CreateBasicBlock(std::string && name) {
    677770    return BasicBlock::Create(getContext(), name, GetInsertBlock()->getParent());
    678771}
     
    717810}
    718811
    719 Value * CBuilder::CreateExtractBitField(llvm::Value * bits, Value * start, Value * length) {
     812Value * CBuilder::CreateExtractBitField(Value * bits, Value * start, Value * length) {
    720813    Constant * One = ConstantInt::get(bits->getType(), 1);
    721814    return CreateAnd(CreateLShr(bits, start), CreateSub(CreateShl(One, length), One));
     
    745838}
    746839
    747 Function * CBuilder::LinkFunction(llvm::StringRef name, FunctionType * type, void * functionPtr) const {
     840Function * CBuilder::LinkFunction(StringRef name, FunctionType * type, void * functionPtr) const {
    748841    assert (mDriver);
    749842    return mDriver->addLinkFunction(getModule(), name, type, functionPtr);
    750843}
    751844
    752 CBuilder::CBuilder(llvm::LLVMContext & C, const unsigned GeneralRegisterWidthInBits)
     845#ifdef HAS_ADDRESS_SANITIZER
     846
     847#define CHECK_ADDRESS(Ptr) \
     848    if (codegen::EnableAsserts) { \
     849        Module * const m = getModule(); \
     850        PointerType * voidPtrTy = getVoidPtrTy(); \
     851        IntegerType * sizeTy = getSizeTy(); \
     852        Function * isPoisoned = m->getFunction("__asan_region_is_poisoned"); \
     853        if (isPoisoned == nullptr) { \
     854            auto isPoisonedTy = FunctionType::get(voidPtrTy, {voidPtrTy, sizeTy}, false); \
     855            isPoisoned = Function::Create(isPoisonedTy, Function::ExternalLinkage, "__asan_region_is_poisoned", m); \
     856        } \
     857        Value * const addr = CreatePointerCast(Ptr, voidPtrTy); \
     858        Constant * const size = ConstantInt::get(sizeTy, Ptr->getType()->getPointerElementType()->getPrimitiveSizeInBits()); \
     859        Value * check = CreateCall(isPoisoned, { addr, size }); \
     860        check = CreateICmpEQ(check, ConstantPointerNull::get(voidPtrTy)); \
     861        CreateAssert(check, "Valid memory address"); \
     862    }
     863
     864LoadInst * CBuilder::CreateLoad(Value *Ptr, const char * Name) {
     865    CHECK_ADDRESS(Ptr);
     866    return IRBuilder<>::CreateLoad(Ptr, Name);
     867}
     868
     869LoadInst * CBuilder::CreateLoad(Value * Ptr, const Twine & Name) {
     870    CHECK_ADDRESS(Ptr);
     871    return IRBuilder<>::CreateLoad(Ptr, Name);
     872}
     873
     874LoadInst * CBuilder::CreateLoad(Type *Ty, Value *Ptr, const Twine & Name) {
     875    CHECK_ADDRESS(Ptr);
     876    return IRBuilder<>::CreateLoad(Ty, Ptr, Name);
     877}
     878
     879LoadInst * CBuilder::CreateLoad(Value *Ptr, bool isVolatile, const Twine & Name) {
     880    CHECK_ADDRESS(Ptr);
     881    return IRBuilder<>::CreateLoad(Ptr, isVolatile, Name);
     882}
     883
     884StoreInst * CBuilder::CreateStore(Value * Val, Value * Ptr, bool isVolatile) {
     885    CHECK_ADDRESS(Ptr);
     886    return IRBuilder<>::CreateStore(Val, Ptr, isVolatile);
     887}
     888
     889#undef CHECK_ADDRESS
     890
     891#endif
     892
     893CBuilder::CBuilder(LLVMContext & C, const unsigned GeneralRegisterWidthInBits)
    753894: IRBuilder<>(C)
    754895, mCacheLineAlignment(64)
  • icGREP/icgrep-devel/icgrep/IR_Gen/CBuilder.h

    r5472 r5486  
    129129    llvm::Value * CreateMMap(llvm::Value * const addr, llvm::Value * size, llvm::Value * const prot, llvm::Value * const flags, llvm::Value * const fd, llvm::Value * const offset);
    130130
    131     llvm::Value * CheckMMapSuccess(llvm::Value * const addr);
    132 
    133131    llvm::Value * CreateMRemap(llvm::Value * addr, llvm::Value * oldSize, llvm::Value * newSize);
    134132
     
    140138    //                    void *(*start_routine)(void*), void *arg);
    141139    llvm::Value * CreatePThreadCreateCall(llvm::Value * thread, llvm::Value * attr, llvm::Function * start_routine, llvm::Value * arg);
     140
     141    //  Create a call to:  int pthread_yield(void);
     142    llvm::Value * CreatePThreadYield();
    142143   
    143144    //  Create a call to:  void pthread_exit(void *value_ptr);
     
    215216    template <typename ExternalFunctionType>
    216217    llvm::Function * LinkFunction(llvm::StringRef name, ExternalFunctionType * functionPtr) const;
     218
     219    #ifdef HAS_ADDRESS_SANITIZER
     220    virtual llvm::LoadInst * CreateLoad(llvm::Value *Ptr, const char *Name);
     221
     222    virtual llvm::LoadInst * CreateLoad(llvm::Value *Ptr, const llvm::Twine &Name = "");
     223
     224    virtual llvm::LoadInst * CreateLoad(llvm::Type *Ty, llvm::Value *Ptr, const llvm::Twine &Name = "");
     225
     226    virtual llvm::LoadInst * CreateLoad(llvm::Value *Ptr, bool isVolatile, const llvm::Twine &Name = "");
     227
     228    virtual llvm::StoreInst * CreateStore(llvm::Value *Val, llvm::Value *Ptr, bool isVolatile = false);
     229
     230    llvm::LoadInst * CreateAlignedLoad(llvm::Value *Ptr, unsigned Align, const char *Name) {
     231        llvm::LoadInst * LI = CreateLoad(Ptr, Name);
     232        LI->setAlignment(Align);
     233        return LI;
     234    }
     235
     236    llvm::LoadInst * CreateAlignedLoad(llvm::Value *Ptr, unsigned Align, const llvm::Twine &Name = "") {
     237        llvm::LoadInst * LI = CreateLoad(Ptr, Name);
     238        LI->setAlignment(Align);
     239        return LI;
     240    }
     241
     242    llvm::LoadInst * CreateAlignedLoad(llvm::Value *Ptr, unsigned Align, bool isVolatile, const llvm::Twine &Name = "") {
     243        llvm::LoadInst * LI = CreateLoad(Ptr, isVolatile, Name);
     244        LI->setAlignment(Align);
     245        return LI;
     246    }
     247
     248    llvm::StoreInst * CreateAlignedStore(llvm::Value *Val, llvm::Value *Ptr, unsigned Align, bool isVolatile = false) {
     249        llvm::StoreInst *SI = CreateStore(Val, Ptr, isVolatile);
     250        SI->setAlignment(Align);
     251        return SI;
     252    }
     253    #endif
    217254
    218255    void setDriver(Driver * const driver) {
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_nvptx_builder.cpp

    r5464 r5486  
    1515std::string IDISA_NVPTX20_Builder::getBuilderUniqueName() { return "NVPTX20_" + std::to_string(groupThreads);}
    1616
    17 int IDISA_NVPTX20_Builder::getGroupThreads(){
     17unsigned IDISA_NVPTX20_Builder::getGroupThreads() const{
    1818    return groupThreads;
    1919}
     
    7373        /*Type=*/carryTy,
    7474        /*isConstant=*/false,
    75         /*Linkage=*/llvm::GlobalValue::InternalLinkage,
     75        /*Linkage=*/GlobalValue::InternalLinkage,
    7676        /*Initializer=*/0,
    7777        /*Name=*/"carry",
    7878        /*InsertBefore*/nullptr,
    79         /*TLMode */llvm::GlobalValue::NotThreadLocal,
     79        /*TLMode */GlobalValue::NotThreadLocal,
    8080        /*AddressSpace*/ 3,
    8181        /*isExternallyInitialized*/false);
     
    8686        /*Type=*/bubbleTy,
    8787        /*isConstant=*/false,
    88         /*Linkage=*/llvm::GlobalValue::InternalLinkage,
     88        /*Linkage=*/GlobalValue::InternalLinkage,
    8989        /*Initializer=*/0,
    9090        /*Name=*/"bubble",
    9191        /*InsertBefore*/nullptr,
    92         /*TLMode */llvm::GlobalValue::NotThreadLocal,
     92        /*TLMode */GlobalValue::NotThreadLocal,
    9393        /*AddressSpace*/ 3,
    9494        /*isExternallyInitialized*/false);
     
    215215  Value * bubbleVal = bubbleInitVal;
    216216
    217   for (int offset=groupThreads/2; offset>0; offset=offset>>1){
     217  for (unsigned offset = groupThreads/2; offset>0; offset=offset>>1){
    218218    carryOffsetPtr = CreateGEP(carry, {getInt32(0), CreateXor(id, getInt32(offset))});
    219219    carryVal = CreateOr(carryVal, CreateLoad(carryOffsetPtr));
     
    263263                             "vote.ballot.b32  $0, %p1;}";
    264264    FunctionType * AsmFnTy = FunctionType::get(int32ty, int32ty, false);
    265     llvm::InlineAsm *IA = llvm::InlineAsm::get(AsmFnTy, AsmStream, "=r,r", true, false);
    266     llvm::CallInst * result = CreateCall(IA, conv);
    267     result->addAttribute(llvm::AttributeSet::FunctionIndex, llvm::Attribute::NoUnwind);
     265    InlineAsm *IA = InlineAsm::get(AsmFnTy, AsmStream, "=r,r", true, false);
     266    CallInst * result = CreateCall(IA, conv);
     267    result->addAttribute(AttributeSet::FunctionIndex, Attribute::NoUnwind);
    268268
    269269    CreateRet(result);
     
    271271
    272272LoadInst * IDISA_NVPTX20_Builder::CreateAtomicLoadAcquire(Value * ptr) {
    273     return CreateLoad(ptr);
    274    
    275 }
     273    return CreateLoad(ptr);   
     274}
     275
    276276StoreInst * IDISA_NVPTX20_Builder::CreateAtomicStoreRelease(Value * val, Value * ptr) {
    277277    return CreateStore(val, ptr);
     
    286286}
    287287
    288    
    289 }
     288#ifdef HAS_ADDRESS_SANITIZER
     289LoadInst * IDISA_NVPTX20_Builder::CreateLoad(Value * Ptr, const char * Name) {
     290    return IRBuilder<>::CreateLoad(Ptr, Name);
     291}
     292
     293LoadInst * IDISA_NVPTX20_Builder::CreateLoad(Value * Ptr, const Twine & Name) {
     294    return IRBuilder<>::CreateLoad(Ptr, Name);
     295}
     296
     297LoadInst * IDISA_NVPTX20_Builder::CreateLoad(Type * Ty, Value * Ptr, const Twine & Name) {
     298    return IRBuilder<>::CreateLoad(Ty, Ptr, Name);
     299}
     300
     301LoadInst * IDISA_NVPTX20_Builder::CreateLoad(Value * Ptr, bool isVolatile, const Twine & Name) {
     302    return IRBuilder<>::CreateLoad(Ptr, isVolatile, Name);
     303}
     304
     305StoreInst * IDISA_NVPTX20_Builder::CreateStore(Value * Val, Value * Ptr, bool isVolatile) {
     306    return IRBuilder<>::CreateStore(Val, Ptr, isVolatile);
     307}
     308#endif
     309
     310}
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_nvptx_builder.h

    r5464 r5486  
    1717    : IDISA_Builder(C, registerWidth, registerWidth, stride)
    1818    , IDISA_I64_Builder(C, registerWidth, registerWidth, stride)
    19     , groupThreads(stride/vectorWidth) {
     19    , groupThreads(stride/vectorWidth)
     20    , barrierFunc(nullptr)
     21    , tidFunc(nullptr)
     22    , mLongAdvanceFunc(nullptr)
     23    , mLongAddFunc(nullptr)
     24    , carry(nullptr)
     25    , bubble(nullptr) {
    2026
    2127    }
     
    2531    virtual std::string getBuilderUniqueName() override;
    2632
    27     int getGroupThreads();
     33    unsigned getGroupThreads() const;
    2834
    2935    void CreateBaseFunctions() override;
     
    4753    }
    4854
     55    #ifdef HAS_ADDRESS_SANITIZER
     56    llvm::LoadInst * CreateLoad(llvm::Value *Ptr, const char *Name) override;
     57
     58    llvm::LoadInst * CreateLoad(llvm::Value *Ptr, const llvm::Twine &Name = "") override;
     59
     60    llvm::LoadInst * CreateLoad(llvm::Type *Ty, llvm::Value *Ptr, const llvm::Twine &Name = "") override;
     61
     62    llvm::LoadInst * CreateLoad(llvm::Value *Ptr, bool isVolatile, const llvm::Twine &Name = "") override;
     63
     64    llvm::StoreInst * CreateStore(llvm::Value *Val, llvm::Value *Ptr, bool isVolatile = false) override;
     65    #endif
     66
    4967private:
    5068
     
    5674
    5775private:
    58     int                         groupThreads;
     76    const unsigned              groupThreads;
    5977    llvm::Function *            barrierFunc;
    6078    llvm::Function *            tidFunc;
  • icGREP/icgrep-devel/icgrep/array-test.cpp

    r5474 r5486  
    247247
    248248int main(int argc, char *argv[]) {
    249     cl::ParseCommandLineOptions(argc, argv);
     249    codegen::ParseCommandLineOptions(argc, argv);
    250250    ParabixDriver pxDriver("mp");
    251251    pipeline(pxDriver, 3);
  • icGREP/icgrep-devel/icgrep/base64.cpp

    r5474 r5486  
    137137
    138138int main(int argc, char *argv[]) {
    139     AddParabixVersionPrinter();
    140     cl::HideUnrelatedOptions(ArrayRef<const cl::OptionCategory *>{&base64Options, codegen::codegen_flags()});
    141     cl::ParseCommandLineOptions(argc, argv);
     139    codegen::ParseCommandLineOptions(argc, argv, {&base64Options, codegen::codegen_flags()});
    142140
    143141    ParabixDriver pxDriver("base64");
  • icGREP/icgrep-devel/icgrep/editd/editd.cpp

    r5474 r5486  
    580580
    581581int main(int argc, char *argv[]) {
    582 
    583     cl::ParseCommandLineOptions(argc, argv);
    584 
     582    codegen::ParseCommandLineOptions(argc, argv);
    585583    int pattern_segs = 0;
    586584    int total_len = 0;
     
    598596
    599597#ifdef CUDA_ENABLED
    600     setNVPTXOption();
    601598    if(codegen::NVPTX){
    602599
  • icGREP/icgrep-devel/icgrep/grep_engine.cpp

    r5485 r5486  
    119119            }
    120120            const auto PTXFilename = mGrepDriver->getBuilder()->getModule()->getModuleIdentifier() + ".ptx";
    121             ulong * rslt = RunPTX(PTXFilename, fileBuffer, fileSize, CountOnly, LFPositions, startPoints, accumBytes);
     121            RunPTX(PTXFilename, fileBuffer, fileSize, CountOnly, LFPositions, startPoints, accumBytes);
    122122            source.close();
    123123        } catch (std::exception & e) {
  • icGREP/icgrep-devel/icgrep/grep_interface.cpp

    r5484 r5486  
    245245void InitializeCommandLineInterface(int argc, char *argv[]) {
    246246    llvm::install_fatal_error_handler(&icgrep_error_handler);
    247     AddParabixVersionPrinter();
    248 #ifndef USE_LLVM_3_6
    249     cl::HideUnrelatedOptions(ArrayRef<const cl::OptionCategory *>{&RE_Options, &Input_Options, &Output_Options, re::re_toolchain_flags(), pablo::pablo_toolchain_flags(), codegen::codegen_flags()});
    250 #endif
    251     cl::ParseCommandLineOptions(argc, argv);
     247    codegen::ParseCommandLineOptions(argc, argv, {&RE_Options, &Input_Options, &Output_Options, re::re_toolchain_flags(), pablo::pablo_toolchain_flags(), codegen::codegen_flags()});
    252248    if (RecursiveFlag || DereferenceRecursiveFlag) {
    253249        DirectoriesFlag = Recurse;
  • icGREP/icgrep-devel/icgrep/icgrep-devel.config

    r5454 r5486  
    1 
     1#define HAS_ADDRESS_SANITIZER
     2#define HAVE_LIBUNWIND
     3#define STDLIB_HAS_ALIGNED_ALLOC
  • icGREP/icgrep-devel/icgrep/icgrep-devel.files

    r5464 r5486  
    3131IR_Gen/idisa_target.cpp
    3232IR_Gen/idisa_target.h
     33IR_Gen/tracegen.cpp
    3334IR_Gen/tracegen.h
    3435kernels/alignedprint.cpp
     
    243244grep_engine.cpp
    244245grep_engine.h
     246grep_interface.cpp
     247grep_interface.h
    245248grep_type.h
    246249hrtime.h
     
    257260wc.cpp
    258261CMakeLists.txt
    259 IR_Gen/tracegen.cpp
  • icGREP/icgrep-devel/icgrep/icgrep-devel.includes

    r5474 r5486  
    22../boost/include/
    33../libllvm/include/
     4pablo
     5kernels
     6UCD
     7editd
     8IR_Gen
     9util
     10re
     11toolchain
     12pablo/passes
     13pablo/analysis
     14cc
     15pablo/optimizers
  • icGREP/icgrep-devel/icgrep/icgrep.cpp

    r5484 r5486  
    189189
    190190    } else {
    191        
    192         setNVPTXOption();
    193        
     191               
    194192        if (codegen::NVPTX) {
    195193            grepEngine.grepCodeGen_nvptx(REs, grep::Mode, UTF_16);
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5479 r5486  
    656656    const unsigned totalSetCount = inputSetCount + outputSetCount;
    657657    bool isDerived[totalSetCount];
    658     int itemsPerPrincipalBlock[totalSetCount];
     658    unsigned itemsPerPrincipalBlock[totalSetCount];
    659659   
    660660    for (unsigned i = 0; i < inputSetCount; i++) {
  • icGREP/icgrep-devel/icgrep/kernels/streamset.cpp

    r5479 r5486  
    3333
    3434Value * StreamSetBuffer::getStreamBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, const bool /* readOnly */) const {
    35     iBuilder->CreateAssert(iBuilder->CreateICmpULT(streamIndex, getStreamSetCount(iBuilder, self)), "StreamSetBuffer: out-of-bounds stream access");
     35    if (codegen::EnableAsserts) {
     36        Value * const count = getStreamSetCount(iBuilder, self);
     37        Value * const index = iBuilder->CreateZExtOrTrunc(streamIndex, count->getType());
     38        Value * const cond = iBuilder->CreateICmpULT(index, count);
     39        iBuilder->CreateAssert(cond, "StreamSetBuffer: out-of-bounds stream access");
     40    }
    3641    return iBuilder->CreateGEP(getStreamSetBlockPtr(iBuilder, self, blockIndex), {iBuilder->getInt32(0), streamIndex});
    3742}
    3843
    3944Value * StreamSetBuffer::getStreamPackPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, Value * packIndex, const bool /* readOnly */) const {
    40     iBuilder->CreateAssert(iBuilder->CreateICmpULT(streamIndex, getStreamSetCount(iBuilder, self)), "StreamSetBuffer: out-of-bounds stream access");
     45    if (codegen::EnableAsserts) {
     46        Value * const count = getStreamSetCount(iBuilder, self);
     47        Value * const index = iBuilder->CreateZExtOrTrunc(streamIndex, count->getType());
     48        Value * const cond = iBuilder->CreateICmpULT(index, count);
     49        iBuilder->CreateAssert(cond, "StreamSetBuffer: out-of-bounds stream access");
     50    }
    4151    return iBuilder->CreateGEP(getStreamSetBlockPtr(iBuilder, self, blockIndex), {iBuilder->getInt32(0), streamIndex, packIndex});
    4252}
     
    140150    Type * i8ptr = iBuilder->getInt8PtrTy();
    141151    unsigned alignment = iBuilder->getBitBlockWidth() / 8;
    142     Constant * blockSize = iBuilder->getSize(iBuilder->getBitBlockWidth());
    143152    unsigned numStreams = getType()->getArrayNumElements();
    144153    auto elemTy = getType()->getArrayElementType();
     
    149158
    150159void StreamSetBuffer::createBlockAlignedCopy(IDISA::IDISA_Builder * const iBuilder, Value * targetBlockPtr, Value * sourceBlockPtr, Value * itemsToCopy) const {
    151     Type * i8ptr = iBuilder->getInt8PtrTy();
    152     unsigned alignment = iBuilder->getBitBlockWidth() / 8;
    153     Constant * blockSize = iBuilder->getSize(iBuilder->getBitBlockWidth());
    154     unsigned numStreams = getType()->getArrayNumElements();
    155     auto elemTy = getType()->getArrayElementType();
    156     unsigned fieldWidth = isa<ArrayType>(elemTy) ? elemTy->getArrayNumElements() : 1;
     160    Type * const int8PtrTy = iBuilder->getInt8PtrTy();
     161    const unsigned alignment = iBuilder->getBitBlockWidth() / 8;
     162    Constant * const blockSize = iBuilder->getSize(iBuilder->getBitBlockWidth());
     163    const unsigned numStreams = getType()->getArrayNumElements();
     164    const auto elemTy = getType()->getArrayElementType();
     165    const auto fieldWidth = isa<ArrayType>(elemTy) ? elemTy->getArrayNumElements() : 1;
    157166    if (numStreams == 1) {
    158167        Value * copyBits = iBuilder->CreateMul(itemsToCopy, iBuilder->getSize(fieldWidth));
    159168        Value * copyBytes = iBuilder->CreateLShr(iBuilder->CreateAdd(copyBits, iBuilder->getSize(7)), iBuilder->getSize(3));
    160         iBuilder->CreateMemMove(iBuilder->CreateBitCast(targetBlockPtr, i8ptr), iBuilder->CreateBitCast(sourceBlockPtr, i8ptr), copyBytes, alignment);
    161         return;
    162     }
    163     Value * blocksToCopy = iBuilder->CreateUDiv(itemsToCopy, blockSize);
    164     Value * partialItems = iBuilder->CreateURem(itemsToCopy, blockSize);
    165     Value * partialBlockTargetPtr = iBuilder->CreateGEP(targetBlockPtr, blocksToCopy);
    166     Value * partialBlockSourcePtr = iBuilder->CreateGEP(sourceBlockPtr, blocksToCopy);
    167     Value * blockCopyBytes = iBuilder->CreateMul(blocksToCopy, iBuilder->getSize(iBuilder->getBitBlockWidth() * numStreams * fieldWidth/8));
    168     iBuilder->CreateMemMove(iBuilder->CreateBitCast(targetBlockPtr, i8ptr), iBuilder->CreateBitCast(sourceBlockPtr, i8ptr), blockCopyBytes, alignment);
    169     Value * partialCopyBitsPerStream = iBuilder->CreateMul(partialItems, iBuilder->getSize(fieldWidth));
    170     Value * partialCopyBytesPerStream = iBuilder->CreateLShr(iBuilder->CreateAdd(partialCopyBitsPerStream, iBuilder->getSize(7)), iBuilder->getSize(3));
    171     for (unsigned strm = 0; strm < numStreams; strm++) {
    172         Value * strmTargetPtr = iBuilder->CreateGEP(partialBlockTargetPtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
    173         Value * strmSourcePtr = iBuilder->CreateGEP(partialBlockSourcePtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
    174         iBuilder->CreateMemMove(iBuilder->CreateBitCast(strmTargetPtr, i8ptr), iBuilder->CreateBitCast(strmSourcePtr, i8ptr), partialCopyBytesPerStream, alignment);
    175     }
    176 }
    177 
    178 
     169        iBuilder->CreateMemMove(iBuilder->CreateBitCast(targetBlockPtr, int8PtrTy), iBuilder->CreateBitCast(sourceBlockPtr, int8PtrTy), copyBytes, alignment);
     170    } else {
     171        Value * blocksToCopy = iBuilder->CreateUDiv(itemsToCopy, blockSize);
     172        Value * partialItems = iBuilder->CreateURem(itemsToCopy, blockSize);
     173        Value * partialBlockTargetPtr = iBuilder->CreateGEP(targetBlockPtr, blocksToCopy);
     174        Value * partialBlockSourcePtr = iBuilder->CreateGEP(sourceBlockPtr, blocksToCopy);
     175        Value * blockCopyBytes = iBuilder->CreateMul(blocksToCopy, iBuilder->getSize(iBuilder->getBitBlockWidth() * numStreams * fieldWidth/8));
     176        iBuilder->CreateMemMove(iBuilder->CreateBitCast(targetBlockPtr, int8PtrTy), iBuilder->CreateBitCast(sourceBlockPtr, int8PtrTy), blockCopyBytes, alignment);
     177        Value * partialCopyBitsPerStream = iBuilder->CreateMul(partialItems, iBuilder->getSize(fieldWidth));
     178        Value * partialCopyBytesPerStream = iBuilder->CreateLShr(iBuilder->CreateAdd(partialCopyBitsPerStream, iBuilder->getSize(7)), iBuilder->getSize(3));
     179        for (unsigned strm = 0; strm < numStreams; strm++) {
     180            Value * strmTargetPtr = iBuilder->CreateGEP(partialBlockTargetPtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
     181            Value * strmSourcePtr = iBuilder->CreateGEP(partialBlockSourcePtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
     182            strmTargetPtr = iBuilder->CreateBitCast(strmTargetPtr, int8PtrTy);
     183            strmSourcePtr = iBuilder->CreateBitCast(strmSourcePtr, int8PtrTy);
     184            iBuilder->CreateMemMove(strmTargetPtr, strmSourcePtr, partialCopyBytesPerStream, alignment);
     185        }
     186    }
     187}
    179188
    180189// Source File Buffer
     
    398407        Value * size = iBuilder->CreateMul(newCapacity, iBuilder->getSize(mBufferBlocks));
    399408        const auto memAlign = std::max(iBuilder->getCacheAlignment(), iBuilder->getBitBlockWidth() / 8);
     409
    400410        Value * newStreamSet = iBuilder->CreatePointerCast(iBuilder->CreateAlignedMalloc(iBuilder->CreateMul(size, vectorWidth), memAlign), elementType->getPointerTo());
    401411        Value * const diffCapacity = iBuilder->CreateMul(iBuilder->CreateSub(newCapacity, capacity), vectorWidth);
  • icGREP/icgrep-devel/icgrep/lz4d.cpp

    r5474 r5486  
    126126    llvm::PrettyStackTraceProgram X(argc, argv);
    127127    llvm_shutdown_obj shutdown;
    128     cl::HideUnrelatedOptions(ArrayRef<const cl::OptionCategory *>{&lz4dFlags, codegen::codegen_flags()});
    129     cl::ParseCommandLineOptions(argc, argv);
     128    codegen::ParseCommandLineOptions(argc, argv, {&lz4dFlags, codegen::codegen_flags()});
    130129    std::string fileName = inputFile;
    131130    LZ4FrameDecoder lz4Frame(fileName);
  • icGREP/icgrep-devel/icgrep/pablo/carry_manager.cpp

    r5464 r5486  
    4444
    4545inline static bool isNonAdvanceCarryGeneratingStatement(const Statement * const stmt) {
    46     switch (stmt->getClassTypeId()) {
    47         case TypeId::ScanThru:
    48         case TypeId::AdvanceThenScanThru:
    49         case TypeId::ScanTo:
    50         case TypeId::AdvanceThenScanTo:
    51         case TypeId::MatchStar:
    52             return true;
    53         default:
    54             return false;
    55     }
     46    return isa<CarryProducingStatement>(stmt) && !isa<Advance>(stmt);
    5647}
    5748
     
    6152 * @brief initializeCarryData
    6253 ** ------------------------------------------------------------------------------------------------------------- */
    63 void CarryManager::initializeCarryData(const std::unique_ptr<kernel::KernelBuilder> &  iBuilder, PabloKernel * const kernel) {
     54void CarryManager::initializeCarryData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, PabloKernel * const kernel) {
    6455
    6556    // Each scope constructs its own CarryData struct, which will be added to the final "carries" struct
     
    10192
    10293/** ------------------------------------------------------------------------------------------------------------- *
     94 * @brief releaseCarryData
     95 ** ------------------------------------------------------------------------------------------------------------- */
     96void CarryManager::releaseCarryData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, PabloKernel * const kernel) {
     97
     98
     99}
     100
     101/** ------------------------------------------------------------------------------------------------------------- *
    103102 * @brief initializeCodeGen
    104103 ** ------------------------------------------------------------------------------------------------------------- */
    105 void CarryManager::initializeCodeGen(const std::unique_ptr<kernel::KernelBuilder> &  iBuilder) {
     104void CarryManager::initializeCodeGen(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
    106105
    107106    assert(!mCarryMetadata.empty());
     
    132131 * @brief finalizeCodeGen
    133132 ** ------------------------------------------------------------------------------------------------------------- */
    134 void CarryManager::finalizeCodeGen(const std::unique_ptr<kernel::KernelBuilder> &  iBuilder) {
     133void CarryManager::finalizeCodeGen(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
    135134    if (mHasLoop) {
    136135        iBuilder->setScalarField("selector", mNextLoopSelector);
     
    152151 * @brief enterLoopScope
    153152 ** ------------------------------------------------------------------------------------------------------------- */
    154 void CarryManager::enterLoopScope(const std::unique_ptr<kernel::KernelBuilder> &  iBuilder, const PabloBlock * const scope) {
     153void CarryManager::enterLoopScope(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, const PabloBlock * const scope) {
    155154    assert (scope);
    156155    assert (mHasLoop);
     
    162161 * @brief enterLoopBody
    163162 ** ------------------------------------------------------------------------------------------------------------- */
    164 void CarryManager::enterLoopBody(const std::unique_ptr<kernel::KernelBuilder> &  iBuilder, BasicBlock * const entryBlock) {
     163void CarryManager::enterLoopBody(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, BasicBlock * const entryBlock) {
    165164    if (mCarryInfo->hasSummary()) {
    166165        Type * const carryTy = iBuilder->getBitBlockType();
     
    179178        assert (mCarryInfo->hasSummary());
    180179
     180        Type * const int8PtrTy = iBuilder->getInt8PtrTy();
    181181        Type * const carryTy = iBuilder->getBitBlockType();
    182182        PointerType * const carryPtrTy = carryTy->getPointerTo();
     
    218218        Value * const newCapacitySize = iBuilder->CreateShl(capacitySize, 1); // x 2
    219219
    220 
    221         Value * newArray = iBuilder->CreateAlignedMalloc(newCapacitySize, iBuilder->getCacheAlignment());
    222         iBuilder->CreateMemCpy(newArray, array, capacitySize, BlockWidth);
    223         iBuilder->CreateMemZero(iBuilder->CreateGEP(newArray, capacitySize), capacitySize, BlockWidth);
    224         iBuilder->CreateFree(array);
    225         newArray = iBuilder->CreatePointerCast(newArray, array->getType());
     220        Value * newArray = iBuilder->CreateRealloc(array, newCapacitySize);
     221
     222        Value * const startNewArrayPtr = iBuilder->CreateGEP(iBuilder->CreatePointerCast(newArray, int8PtrTy), capacitySize);
     223        iBuilder->CreateMemZero(startNewArrayPtr, capacitySize, BlockWidth);
     224
    226225        iBuilder->CreateStore(newArray, arrayPtr);
    227226
     
    232231
    233232        Value * const summary = iBuilder->CreateLoad(summaryPtr, false);
    234         Value * newSummary = iBuilder->CreateAlignedMalloc(newSummarySize, BlockWidth);
    235         iBuilder->CreateMemCpy(newSummary, summary, summarySize, BlockWidth);
    236         iBuilder->CreateMemZero(iBuilder->CreateGEP(newSummary, summarySize), iBuilder->getSize(2 * BlockWidth), BlockWidth);
    237         iBuilder->CreateFree(summary);
     233        Value * newSummary = iBuilder->CreateRealloc(summary, newSummarySize);
     234        Value * const startNewSummaryPtr = iBuilder->CreateGEP(iBuilder->CreatePointerCast(newArray, int8PtrTy), summarySize);
     235        iBuilder->CreateMemZero(startNewSummaryPtr, iBuilder->getSize(2 * BlockWidth), BlockWidth);
    238236
    239237        Value * ptr1 = iBuilder->CreateGEP(newSummary, summarySize);
     
    290288 * @brief leaveLoopBody
    291289 ** ------------------------------------------------------------------------------------------------------------- */
    292 void CarryManager::leaveLoopBody(const std::unique_ptr<kernel::KernelBuilder> &  iBuilder, BasicBlock * /* exitBlock */) {
     290void CarryManager::leaveLoopBody(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, BasicBlock * /* exitBlock */) {
    293291
    294292    Type * const carryTy = iBuilder->getBitBlockType();
     
    400398 * @brief leaveLoopScope
    401399 ** ------------------------------------------------------------------------------------------------------------- */
    402 void CarryManager::leaveLoopScope(const std::unique_ptr<kernel::KernelBuilder> &  iBuilder, BasicBlock * const /* entryBlock */, BasicBlock * const /* exitBlock */) {
     400void CarryManager::leaveLoopScope(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, BasicBlock * const /* entryBlock */, BasicBlock * const /* exitBlock */) {
    403401    assert (mLoopDepth > 0);
    404402    --mLoopDepth;
     
    409407 * @brief enterIfScope
    410408 ** ------------------------------------------------------------------------------------------------------------- */
    411 void CarryManager::enterIfScope(const std::unique_ptr<kernel::KernelBuilder> &  iBuilder, const PabloBlock * const scope) {
     409void CarryManager::enterIfScope(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, const PabloBlock * const scope) {
    412410    ++mIfDepth;
    413411    enterScope(iBuilder, scope);
     
    429427 * @brief generateSummaryTest
    430428 ** ------------------------------------------------------------------------------------------------------------- */
    431 Value * CarryManager::generateSummaryTest(const std::unique_ptr<kernel::KernelBuilder> &  iBuilder, Value * condition) {
     429Value * CarryManager::generateSummaryTest(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, Value * condition) {
    432430    if (LLVM_LIKELY(mCarryInfo->hasSummary())) {
    433431        assert ("summary test was not generated" && mNextSummaryTest);
     
    442440 * @brief enterIfBody
    443441 ** ------------------------------------------------------------------------------------------------------------- */
    444 void CarryManager::enterIfBody(const std::unique_ptr<kernel::KernelBuilder> &  /* iBuilder */, BasicBlock * const entryBlock) {
     442void CarryManager::enterIfBody(const std::unique_ptr<kernel::KernelBuilder> & /* iBuilder */, BasicBlock * const entryBlock) {
    445443    assert (entryBlock);
    446444}
     
    449447 * @brief leaveIfBody
    450448 ** ------------------------------------------------------------------------------------------------------------- */
    451 void CarryManager::leaveIfBody(const std::unique_ptr<kernel::KernelBuilder> &  iBuilder, BasicBlock * const exitBlock) {
     449void CarryManager::leaveIfBody(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, BasicBlock * const exitBlock) {
    452450    assert (exitBlock);
    453451    const auto n = mCarrySummaryStack.size();
     
    463461 * @brief leaveIfScope
    464462 ** ------------------------------------------------------------------------------------------------------------- */
    465 void CarryManager::leaveIfScope(const std::unique_ptr<kernel::KernelBuilder> &  iBuilder, BasicBlock * const entryBlock, BasicBlock * const exitBlock) {
     463void CarryManager::leaveIfScope(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, BasicBlock * const entryBlock, BasicBlock * const exitBlock) {
    466464    assert (mIfDepth > 0);
    467465    if (LLVM_LIKELY(mCarryInfo->hasSummary())) {
     
    487485 * @brief enterScope
    488486 ** ------------------------------------------------------------------------------------------------------------- */
    489 void CarryManager::enterScope(const std::unique_ptr<kernel::KernelBuilder> &  iBuilder, const PabloBlock * const scope) {
     487void CarryManager::enterScope(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, const PabloBlock * const scope) {
    490488    assert (scope);
    491489    // Store the state of the current frame and update the scope state
     
    506504 * @brief leaveScope
    507505 ** ------------------------------------------------------------------------------------------------------------- */
    508 void CarryManager::leaveScope(const std::unique_ptr<kernel::KernelBuilder> &  /* iBuilder */) {
     506void CarryManager::leaveScope(const std::unique_ptr<kernel::KernelBuilder> & /* iBuilder */) {
    509507
    510508    // Did we use all of the packs in this carry struct?
     
    528526 * @brief addCarryInCarryOut
    529527 ** ------------------------------------------------------------------------------------------------------------- */
    530 Value * CarryManager::addCarryInCarryOut(const std::unique_ptr<kernel::KernelBuilder> &  iBuilder, const Statement * const operation, Value * const e1, Value * const e2) {
     528Value * CarryManager::addCarryInCarryOut(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, const Statement * const operation, Value * const e1, Value * const e2) {
    531529    assert (operation && (isNonAdvanceCarryGeneratingStatement(operation)));
    532530    Value * const carryIn = getNextCarryIn(iBuilder);
     
    541539 * @brief advanceCarryInCarryOut
    542540 ** ------------------------------------------------------------------------------------------------------------- */
    543 Value * CarryManager::advanceCarryInCarryOut(const std::unique_ptr<kernel::KernelBuilder> &  iBuilder, const Advance * const advance, Value * const value) {
     541Value * CarryManager::advanceCarryInCarryOut(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, const Advance * const advance, Value * const value) {
    544542    const auto shiftAmount = advance->getAmount();
    545543    if (LLVM_LIKELY(shiftAmount < LONG_ADVANCE_BREAKPOINT)) {
     
    558556 * @brief longAdvanceCarryInCarryOut
    559557 ** ------------------------------------------------------------------------------------------------------------- */
    560 inline Value * CarryManager::longAdvanceCarryInCarryOut(const std::unique_ptr<kernel::KernelBuilder> &  iBuilder, Value * const value, const unsigned shiftAmount) {
     558inline Value * CarryManager::longAdvanceCarryInCarryOut(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, Value * const value, const unsigned shiftAmount) {
    561559
    562560    assert (mHasLongAdvance);
     
    638636 * @brief getNextCarryIn
    639637 ** ------------------------------------------------------------------------------------------------------------- */
    640 Value * CarryManager::getNextCarryIn(const std::unique_ptr<kernel::KernelBuilder> &  iBuilder) {
     638Value * CarryManager::getNextCarryIn(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
    641639    assert (mCurrentFrameIndex < mCurrentFrame->getType()->getPointerElementType()->getStructNumElements());
    642640    if (mLoopDepth == 0) {
     
    657655 * @brief setNextCarryOut
    658656 ** ------------------------------------------------------------------------------------------------------------- */
    659 void CarryManager::setNextCarryOut(const std::unique_ptr<kernel::KernelBuilder> &  iBuilder, Value * carryOut) {
     657void CarryManager::setNextCarryOut(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, Value * carryOut) {
    660658    Type * const carryTy = iBuilder->getBitBlockType();
    661659    assert (mCurrentFrameIndex < mCurrentFrame->getType()->getPointerElementType()->getStructNumElements());
     
    679677 * @brief readCarryInSummary
    680678 ** ------------------------------------------------------------------------------------------------------------- */
    681 Value * CarryManager::readCarryInSummary(const std::unique_ptr<kernel::KernelBuilder> &  iBuilder, ConstantInt * index) const {
     679Value * CarryManager::readCarryInSummary(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, ConstantInt * index) const {
    682680    assert (mCarryInfo->hasSummary());
    683681    unsigned count = 2;
     
    711709 * @brief writeCarryOutSummary
    712710 ** ------------------------------------------------------------------------------------------------------------- */
    713 inline void CarryManager::writeCarryOutSummary(const std::unique_ptr<kernel::KernelBuilder> &  iBuilder, Value * const summary, ConstantInt * index) const {
     711inline void CarryManager::writeCarryOutSummary(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, Value * const summary, ConstantInt * index) const {
    714712    Value * ptr = nullptr;
    715713    assert (mCarryInfo->hasExplicitSummary());
     
    725723 * @brief addToCarryOutSummary
    726724 ** ------------------------------------------------------------------------------------------------------------- */
    727 inline void CarryManager::addToCarryOutSummary(const std::unique_ptr<kernel::KernelBuilder> &  iBuilder, Value * const value) {
     725inline void CarryManager::addToCarryOutSummary(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, Value * const value) {
    728726    assert ("cannot add null summary value!" && value);   
    729727    assert ("summary stack is empty!" && !mCarrySummaryStack.empty());
     
    748746 ** ------------------------------------------------------------------------------------------------------------- */
    749747bool CarryManager::hasIterationSpecificAssignment(const PabloBlock * const scope) {
     748#if 0
     749    return dyn_cast_or_null<While>(scope->getBranch()) != nullptr;
     750#else
    750751    if (const While * const br = dyn_cast_or_null<While>(scope->getBranch())) {
    751752        for (const Var * var : br->getEscaped()) {
     
    771772    }
    772773    return false;
     774#endif
    773775}
    774776
     
    776778 * @brief analyse
    777779 ** ------------------------------------------------------------------------------------------------------------- */
    778 StructType * CarryManager::analyse(const std::unique_ptr<kernel::KernelBuilder> &  iBuilder, const PabloBlock * const scope, const unsigned ifDepth, const unsigned loopDepth, const bool isNestedWithinNonCarryCollapsingLoop) {
     780StructType * CarryManager::analyse(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, const PabloBlock * const scope, const unsigned ifDepth, const unsigned loopDepth, const bool isNestedWithinNonCarryCollapsingLoop) {
    779781    assert ("scope cannot be null!" && scope);
    780782    assert (mCarryScopes == 0 ? (scope == mKernel->getEntryBlock()) : (scope != mKernel->getEntryBlock()));
  • icGREP/icgrep-devel/icgrep/pablo/carry_manager.h

    r5440 r5486  
    4646    CarryManager() noexcept;
    4747
    48     void initializeCarryData(const std::unique_ptr<kernel::KernelBuilder> &  iBuilder, PabloKernel * const kernel);
     48    void initializeCarryData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, PabloKernel * const kernel);
    4949
    50     void initializeCodeGen(const std::unique_ptr<kernel::KernelBuilder> &  iBuilder);
     50    void releaseCarryData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, PabloKernel * const kernel);
    5151
    52     void finalizeCodeGen(const std::unique_ptr<kernel::KernelBuilder> &  iBuilder);
     52    void initializeCodeGen(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
     53
     54    void finalizeCodeGen(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
    5355
    5456    /* Entering and leaving loops. */
    5557
    56     void enterLoopScope(const std::unique_ptr<kernel::KernelBuilder> &  iBuilder, const PabloBlock * const scope);
     58    void enterLoopScope(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, const PabloBlock * const scope);
    5759
    58     void enterLoopBody(const std::unique_ptr<kernel::KernelBuilder> &  iBuilder, llvm::BasicBlock * const entryBlock);
     60    void enterLoopBody(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, llvm::BasicBlock * const entryBlock);
    5961
    60     void leaveLoopBody(const std::unique_ptr<kernel::KernelBuilder> &  iBuilder, llvm::BasicBlock * const exitBlock);
     62    void leaveLoopBody(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, llvm::BasicBlock * const exitBlock);
    6163
    62     void leaveLoopScope(const std::unique_ptr<kernel::KernelBuilder> &  iBuilder, llvm::BasicBlock * const entryBlock, llvm::BasicBlock * const exitBlock);
     64    void leaveLoopScope(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, llvm::BasicBlock * const entryBlock, llvm::BasicBlock * const exitBlock);
    6365
    6466    /* Entering and leaving ifs. */
    6567
    66     void enterIfScope(const std::unique_ptr<kernel::KernelBuilder> &  iBuilder, const PabloBlock * const scope);
     68    void enterIfScope(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, const PabloBlock * const scope);
    6769
    68     void enterIfBody(const std::unique_ptr<kernel::KernelBuilder> &  iBuilder, llvm::BasicBlock * const entryBlock);
     70    void enterIfBody(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, llvm::BasicBlock * const entryBlock);
    6971
    70     void leaveIfBody(const std::unique_ptr<kernel::KernelBuilder> &  iBuilder, llvm::BasicBlock * const exitBlock);
     72    void leaveIfBody(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, llvm::BasicBlock * const exitBlock);
    7173
    72     void leaveIfScope(const std::unique_ptr<kernel::KernelBuilder> &  iBuilder, llvm::BasicBlock * const entryBlock, llvm::BasicBlock * const exitBlock);
     74    void leaveIfScope(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, llvm::BasicBlock * const entryBlock, llvm::BasicBlock * const exitBlock);
    7375
    7476    /* Methods for processing individual carry-generating operations. */
    7577   
    76     llvm::Value * addCarryInCarryOut(const std::unique_ptr<kernel::KernelBuilder> &  iBuilder, const Statement * operation, llvm::Value * const e1, llvm::Value * const e2);
     78    llvm::Value * addCarryInCarryOut(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, const Statement * operation, llvm::Value * const e1, llvm::Value * const e2);
    7779
    78     llvm::Value * advanceCarryInCarryOut(const std::unique_ptr<kernel::KernelBuilder> &  iBuilder, const Advance * advance, llvm::Value * const strm);
     80    llvm::Value * advanceCarryInCarryOut(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, const Advance * advance, llvm::Value * const strm);
    7981 
    8082    /* Methods for getting and setting carry summary values for If statements */
    8183         
    82     llvm::Value * generateSummaryTest(const std::unique_ptr<kernel::KernelBuilder> &  iBuilder, llvm::Value * condition);
     84    llvm::Value * generateSummaryTest(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, llvm::Value * condition);
    8385
    8486protected:
     
    8890    static bool hasIterationSpecificAssignment(const PabloBlock * const scope);
    8991
    90     llvm::StructType * analyse(const std::unique_ptr<kernel::KernelBuilder> &  iBuilder, const PabloBlock * const scope, const unsigned ifDepth = 0, const unsigned whileDepth = 0, const bool isNestedWithinNonCarryCollapsingLoop = false);
     92    llvm::StructType * analyse(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, const PabloBlock * const scope, const unsigned ifDepth = 0, const unsigned whileDepth = 0, const bool isNestedWithinNonCarryCollapsingLoop = false);
    9193
    9294    /* Entering and leaving scopes. */
    93     void enterScope(const std::unique_ptr<kernel::KernelBuilder> &  iBuilder, const PabloBlock * const scope);
    94     void leaveScope(const std::unique_ptr<kernel::KernelBuilder> &  iBuilder);
     95    void enterScope(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, const PabloBlock * const scope);
     96    void leaveScope(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
    9597
    9698    /* Methods for processing individual carry-generating operations. */
    97     llvm::Value * getNextCarryIn(const std::unique_ptr<kernel::KernelBuilder> &  iBuilder);
    98     void setNextCarryOut(const std::unique_ptr<kernel::KernelBuilder> &  iBuilder, llvm::Value * const carryOut);
    99     llvm::Value * longAdvanceCarryInCarryOut(const std::unique_ptr<kernel::KernelBuilder> &  iBuilder, llvm::Value * const value, const unsigned shiftAmount);
    100     llvm::Value * readCarryInSummary(const std::unique_ptr<kernel::KernelBuilder> &  iBuilder, llvm::ConstantInt *index) const;
    101     void writeCarryOutSummary(const std::unique_ptr<kernel::KernelBuilder> &  iBuilder, llvm::Value * const summary, llvm::ConstantInt * index) const;
     99    llvm::Value * getNextCarryIn(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
     100    void setNextCarryOut(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, llvm::Value * const carryOut);
     101    llvm::Value * longAdvanceCarryInCarryOut(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, llvm::Value * const value, const unsigned shiftAmount);
     102    llvm::Value * readCarryInSummary(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, llvm::ConstantInt *index) const;
     103    void writeCarryOutSummary(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, llvm::Value * const summary, llvm::ConstantInt * index) const;
    102104
    103105    /* Summary handling routines */
    104     void addToCarryOutSummary(const std::unique_ptr<kernel::KernelBuilder> &  iBuilder, llvm::Value * const value);
     106    void addToCarryOutSummary(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, llvm::Value * const value);
    105107
    106108private:
  • icGREP/icgrep-devel/icgrep/pablo/optimizers/booleanreassociationpass.cpp

    r5454 r5486  
    337337            }
    338338        } else { // characterize this statement then check whether it is equivalent to any existing one.
    339             PabloAST * const folded = Simplifier::fold(stmt, block);
     339            PabloAST * const folded = Simplifier::triviallyFold(stmt, block);
    340340            if (LLVM_UNLIKELY(folded != nullptr)) {
    341341                stmt = stmt->replaceWith(folded);
  • icGREP/icgrep-devel/icgrep/pablo/optimizers/distributivepass.cpp

    r5464 r5486  
    99#include <pablo/pe_ones.h>
    1010#include <pablo/boolean.h>
     11#include <pablo/pe_var.h>
    1112#include <boost/container/flat_set.hpp>
    1213#include <boost/container/flat_map.hpp>
     14#include <boost/range/adaptor/reversed.hpp>
    1315#include <boost/graph/adjacency_list.hpp>
    1416#include <boost/graph/topological_sort.hpp>
    1517#include <boost/function_output_iterator.hpp>
     18#include <set>
    1619
    1720#include <boost/graph/strong_components.hpp>
     
    2528using VertexData = std::pair<pablo::PabloAST *, TypeId>;
    2629
    27 using Graph = adjacency_list<hash_setS, vecS, bidirectionalS, VertexData, pablo::PabloAST *>;
     30using Graph = adjacency_list<vecS, vecS, bidirectionalS, VertexData, pablo::PabloAST *>;
    2831using Vertex = Graph::vertex_descriptor;
    29 
    30 using DistributionGraph = adjacency_list<hash_setS, vecS, bidirectionalS, Vertex>;
    31 using DistributionVertex = DistributionGraph::vertex_descriptor;
     32using in_edge_iterator = graph_traits<Graph>::in_edge_iterator;
     33using out_edge_iterator = graph_traits<Graph>::out_edge_iterator;
    3234
    3335using VertexSet = std::vector<Vertex>;
     
    145147struct PassContainer {
    146148
     149    enum Modification {
     150        None, Trivial, Structural
     151    };
     152
    147153    /** ------------------------------------------------------------------------------------------------------------- *
    148154     * @brief run
     
    166172     * Try to eliminate some of the unnecessary operations from the current Variadic expressions
    167173     ** ------------------------------------------------------------------------------------------------------------- */
     174    void run(PabloKernel * const kernel) {
     175        run(kernel->getEntryBlock());
     176
     177        printGraph(G, "G", errs());
     178        if (simplifyGraph() == Structural) {
     179            // rewriteAST(first, stmt);
     180            printGraph(G, "O", errs());
     181        }
     182
     183    }
     184
    168185    void run(PabloBlock * const block) {
    169         Statement * stmt = block->front();
    170         // Statement * first = stmt;
    171         while (stmt) {
    172             Statement * next = stmt->getNextNode();
     186        for (Statement * stmt : *block) {           
    173187            if (isa<Branch>(stmt)) {
    174                 // addUsageInformation();
    175                 if (simplifyGraph()) {
    176                     // rewriteAST(first, stmt);
    177 
    178                     // printGraph(G, "G", errs());
    179                 }
    180 
    181 
    182 
    183                 G.clear();
    184                 M.clear();
     188                addBranch(stmt);
    185189                run(cast<Branch>(stmt)->getBody());
    186                 G.clear();
    187                 M.clear();
    188190            } else {
    189191                addStatement(stmt);
    190192            }
    191             stmt = next;
    192         }
     193        }
     194
     195//        G.clear();
     196//        M.clear();
     197//        for (Statement * stmt : *block) {
     198//            addStatement(stmt);
     199//        }
     200
     201//        printGraph(G, "G", errs());
     202//        if (simplifyGraph() == Structural) {
     203//            // rewriteAST(first, stmt);
     204//            printGraph(G, "O", errs());
     205//        }
     206
     207    }
     208
     209    /** ------------------------------------------------------------------------------------------------------------- *
     210     * @brief simplifyGraph
     211     ** ------------------------------------------------------------------------------------------------------------- */
     212    Modification simplifyGraph() {
     213        Modification modified = None;
     214        for (;;) {
     215            const auto p1 = applyAssociativeIdentityAnnulmentLaws();
     216            const auto p2 = applyAbsorbtionComplementIdempotentLaws();
     217            const auto p3 = applyDistributivityLaw();
     218            if (std::max(std::max(p1, p2), p3) != Structural) {
     219                break;
     220            }
     221            modified = Structural;
     222        }
     223        return modified;
    193224    }
    194225
    195226protected:
    196227
    197 //    /** ------------------------------------------------------------------------------------------------------------- *
    198 //     * @brief addUsageInformation
    199 //     *
    200 //     * Populate G with the user information of each statement so that we can determine whether it'll be cost effective
    201 //     * to distribute an operation.
    202 //     ** ------------------------------------------------------------------------------------------------------------- */
    203 //    void addUsageInformation() {
    204 //        for (const Vertex u : make_iterator_range(vertices(G))) {
    205 //            PabloAST * expr; TypeId typeId;
    206 //            std::tie(expr, typeId) = G[u];
    207 //            if (LLVM_LIKELY(typeId != TypeId::Var)) {
    208 //                for (PabloAST * user : expr->users()) {
    209 //                    add_edge(u, addExpression(user), user, G);
    210 //                }
    211 //            }
    212 //        }
    213 //    }
    214 
    215228    /** ------------------------------------------------------------------------------------------------------------- *
    216229     * @brief applyAssociativeIdentityAnnulmentLaws
    217230     ** ------------------------------------------------------------------------------------------------------------- */
    218     bool applyAssociativeIdentityAnnulmentLaws() {
    219 
    220         bool modified = false;
    221 
    222         std::function<void(const Vertex)> apply = [&](const Vertex u) {
    223             PabloAST * expr; TypeId typeId;
    224             std::tie(expr, typeId) = G[u];
    225 
    226 
    227 
    228             if (LLVM_UNLIKELY(typeId == TypeId::Zeroes || typeId == TypeId::Ones)) {
    229 repeat:         for (auto e : make_iterator_range(out_edges(u, G))) {
    230                     const auto v = target(e, G);
    231                     const auto targetTypeId = getType(v);
    232                     if (LLVM_UNLIKELY(isAssociative(targetTypeId))) {
    233                         if (isIdentityRelation(typeId, targetTypeId)) {
    234                             remove_edge(e, G);
    235                         } else {
    236                             setType(v, typeId == TypeId::And ? TypeId::Zeroes : TypeId::Ones);
    237                             clear_in_edges(v, G);
    238                             apply(v);
    239                         }
    240                         modified = true;
    241                         goto repeat;
     231    Modification applyAssociativeIdentityAnnulmentLaws() {
     232
     233        auto identityComparator = [this](const Vertex u, const Vertex v) -> bool {
     234            const auto typeA = getType(u);
     235            assert (typeA != TypeId::Var);
     236            const auto typeB = getType(v);
     237            assert (typeB != TypeId::Var);
     238            if (LLVM_LIKELY(typeA != typeB)) {
     239                using value_of = std::underlying_type<TypeId>::type;
     240                return static_cast<value_of>(typeA) < static_cast<value_of>(typeB);
     241            } else {
     242                const auto degA = in_degree(u, G);
     243                const auto degB = in_degree(v, G);
     244                if (degA != degB) {
     245                    return degA < degB;
     246                } else {
     247                    Vertex adjA[degA];
     248                    Vertex adjB[degA];
     249                    in_edge_iterator ei, ej;
     250                    std::tie(ei, std::ignore) = in_edges(u, G);
     251                    std::tie(ej, std::ignore) = in_edges(v, G);
     252                    for (size_t i = 0; i < degA; ++i, ++ei, ++ej) {
     253                        adjA[i] = source(*ei, G);
     254                        adjB[i] = source(*ej, G);
     255                    }
     256                    std::sort(adjA, adjA + degA);
     257                    std::sort(adjB, adjB + degA);
     258                    for (size_t i = 0; i < degA; ++i) {
     259                        if (adjA[i] < adjB[i]) {
     260                            return true;
     261                        }
     262                    }
     263                    return false;
     264                }
     265            }
     266        };
     267
     268        flat_set<Vertex, decltype(identityComparator)> V(identityComparator);
     269        V.reserve(num_vertices(G));
     270
     271        VertexSet ordering;
     272        ordering.reserve(num_vertices(G));
     273
     274        topological_sort(G, std::back_inserter(ordering)); // note: ordering is in reverse topological order
     275
     276        Modification modified = None;
     277
     278        for (const auto u : boost::adaptors::reverse(ordering)) {
     279            const TypeId typeId = getType(u);
     280            if (isImmutable(typeId)) {
     281                continue;
     282            } else if (LLVM_UNLIKELY(typeId == TypeId::Zeroes || typeId == TypeId::Ones)) {
     283                for(;;) {
     284                    bool done = true;
     285                    for (auto e : make_iterator_range(out_edges(u, G))) {
     286                        const auto v = target(e, G);
     287                        const auto targetTypeId = getType(v);
     288                        if (LLVM_UNLIKELY(isAssociative(targetTypeId))) {
     289
     290                            errs() << " -- identity relationship\n";
     291
     292                            if (isIdentityRelation(typeId, targetTypeId)) {
     293                                remove_edge(e, G);
     294                            } else {
     295                                setType(v, typeId == TypeId::And ? TypeId::Zeroes : TypeId::Ones);
     296                                clear_in_edges(v, G);
     297                            }
     298                            done = false;
     299                            modified = Structural;
     300                            break;
     301                        }
     302                    }
     303                    if (done) {
     304                        break;
    242305                    }
    243306                }
    244307            } else if (isAssociative(typeId)) {
    245 
    246                 bool contractable = true;
    247                 // an associative operation with only one element is always equivalent to the element
    248                 if (LLVM_LIKELY(in_degree(u, G) != 1)) {
    249                     for (auto e : make_iterator_range(out_edges(u, G))) {
    250                         if (LLVM_LIKELY(typeId != getType(target(e, G)))) {
    251                             contractable = false;
    252                             break;
    253                         }
    254                     }
    255                 }
    256                 if (LLVM_UNLIKELY(contractable)) {
    257                     for (auto ei : make_iterator_range(in_edges(u, G))) {
    258                         for (auto ej : make_iterator_range(out_edges(u, G))) {
    259                             add_edge(source(ei, G), target(ej, G), expr, G);
    260                         }
    261                     }
    262                     clear_vertex(u, G);
    263                     setType(u, TypeId::Var);
    264                     modified = true;
    265                 }
    266             }
    267         };
    268 
    269         // note: calls "apply" on each vertex in reverse topological order
    270         topological_sort(G, boost::make_function_output_iterator(apply));
    271 
     308                if (LLVM_UNLIKELY(in_degree(u, G) == 0)) {
     309                    setType(u, TypeId::Zeroes);
     310                } else {
     311                    // An associative operation with only one element is always equivalent to the element
     312                    bool contractable = true;
     313                    if (LLVM_LIKELY(in_degree(u, G) > 1)) {
     314                        for (auto e : make_iterator_range(out_edges(u, G))) {
     315                            if (LLVM_LIKELY(typeId != getType(target(e, G)))) {
     316                                contractable = false;
     317                                break;
     318                            }
     319                        }
     320                    }
     321                    if (LLVM_UNLIKELY(contractable)) {
     322                        for (auto ei : make_iterator_range(in_edges(u, G))) {
     323                            for (auto ej : make_iterator_range(out_edges(u, G))) {
     324                                addEdge(source(ei, G), target(ej, G), G[ei]);
     325                            }
     326                        }
     327                        removeVertex(u);
     328                        modified = std::max(modified, Trivial);
     329                        continue;
     330                    }
     331
     332                    if (LLVM_UNLIKELY(typeId == TypeId::Xor)) {
     333                        // TODO:: (A ⊕ ¬B) = (A ⊕ (B ⊕ 1)) = ¬(A ⊕ B)
     334
     335                    }
     336
     337
     338
     339                }
     340            }
     341
     342            assert (getType(u) != TypeId::Var);
     343
     344            const auto f = V.insert(u);
     345            if (LLVM_UNLIKELY(!f.second)) {
     346                const auto v = *f.first;
     347
     348                errs() << " -- replacing " << u << " with " << v << "\n";
     349
     350                for (auto e : make_iterator_range(out_edges(u, G))) {
     351                    addEdge(v, target(e, G), G[e]);
     352                }
     353                removeVertex(u);
     354                modified = Structural;
     355            }
     356        }
    272357        return modified;
    273358    }
     
    276361     * @brief applyAbsorbtionComplementIdempotentLaws
    277362     ** ------------------------------------------------------------------------------------------------------------- */
    278     bool applyAbsorbtionComplementIdempotentLaws() {
    279         using in_edge_iterator = graph_traits<Graph>::in_edge_iterator;
    280         bool modified = false;
     363    Modification applyAbsorbtionComplementIdempotentLaws() {
     364        Modification modified = None;
    281365        for (const Vertex u : make_iterator_range(vertices(G))) {
    282366            const TypeId typeId = getType(u);
     
    293377                            for (auto ek = ek_begin; ek != ek_end; ++ek) {
    294378                                if (LLVM_UNLIKELY(source(*ej, G) == source(*ek, G))) {
     379                                    modified = Structural;
    295380                                    if (LLVM_UNLIKELY(innerTypeId == TypeId::Not)) {
    296381                                        // complement
     
    305390                                            // absorbtion
    306391                                            remove_edge(*ej, G);
    307                                         }
    308                                         modified = true;
     392                                        }                                       
    309393                                        // this seldom occurs so if it does, just restart the process rather than
    310394                                        // trying to keep the iterators valid.
     
    322406    }
    323407
    324 
    325     /** ------------------------------------------------------------------------------------------------------------- *
    326      * @brief contractGraph
    327      *
    328      * This function scans through a scope block and computes a DAG G in which any sequences of AND, OR or XOR functions
    329      * are "flattened" (i.e., allowed to have any number of inputs.)
    330      ** ------------------------------------------------------------------------------------------------------------- */
    331     bool contractGraph() {
    332         bool modified = false;
    333         bool alreadyGoneOnce = false;
    334         for (;;) {
    335             if (applyAssociativeIdentityAnnulmentLaws()) {
    336                 modified = true;
    337             } else if (alreadyGoneOnce) {
    338                 break;
    339             }
    340             if (applyAbsorbtionComplementIdempotentLaws()) {
    341                 modified = true;
    342             } else { // if (alreadyGoneOnce) {
    343                 break;
    344             }
    345             alreadyGoneOnce = true;
    346         }
    347         return modified;
    348     }
    349 
    350     /** ------------------------------------------------------------------------------------------------------------- *
    351      * @brief addVertex
    352      ** ------------------------------------------------------------------------------------------------------------- */
    353     DistributionVertex addVertex(const Vertex u) {
    354         const auto f = D.find(u);
    355         if (LLVM_LIKELY(f != D.end())) {
    356             return f->second;
    357         }
    358         const auto v = add_vertex(u, H);
    359         D.emplace(u, v);
    360         return v;
    361     }
    362 
    363     /** ------------------------------------------------------------------------------------------------------------- *
    364      * @brief generateDistributionGraph
     408    /** ------------------------------------------------------------------------------------------------------------- *
     409     * @brief identifyDistributableVertices
    365410     *
    366411     * Let (u) ∈ V(G) be a conjunction ∧ or disjunction √ and (v) be a ∧ or √ and the opposite type of (u). If (u,v) ∈
     
    375420     *
    376421     ** ------------------------------------------------------------------------------------------------------------- */
    377     void generateDistributionGraph() {
    378 
    379         assert (D.empty());
    380 
    381         flat_set<Vertex> distributable;
    382         flat_set<Vertex> observed;
     422    void identifyDistributableVertices() {
     423
     424        assert (D.empty() && L.empty());
    383425
    384426        for (const Vertex u : make_iterator_range(vertices(G))) {
    385427            const TypeId outerTypeId = getType(u);
    386428            if (isDistributive(outerTypeId)) {
     429                bool beneficial = true;
    387430                const TypeId innerTypeId = oppositeTypeId(outerTypeId);
    388                 for (auto e : make_iterator_range(in_edges(u, G))) {
    389                     const Vertex v = source(e, G);
    390                     if (LLVM_UNLIKELY(std::get<1>(G[v]) == innerTypeId)) {
    391                         bool beneficial = true;
    392                         for (const auto e : make_iterator_range(out_edges(v, G))) {
    393                             if (std::get<1>(G[target(e, G)]) != outerTypeId) {
    394                                 beneficial = false;
    395                                 break;
    396                             }
    397                         }
    398                         if (beneficial) {
    399                             distributable.insert(v);
    400                         }
    401                     }
    402                 }
    403                 if (LLVM_LIKELY(distributable.size() > 1)) {
    404                     for (const Vertex v : distributable) {
    405                         for (auto e : make_iterator_range(in_edges(v, G))) {
    406                             observed.insert(source(e, G));
    407                         }
    408                     }
    409                     for (const Vertex w : observed) {
    410                         for (auto e : make_iterator_range(out_edges(w, G))) {
    411                             const Vertex v = target(e, G);
    412                             if (distributable.count(v)) {
    413                                 const Vertex y = addVertex(v);
    414                                 boost::add_edge(y, addVertex(u), H);
    415                                 boost::add_edge(addVertex(w), y, H);
    416                             }
    417                         }
    418                     }
    419                     observed.clear();
    420                 }
    421                 distributable.clear();
     431                for (auto e : make_iterator_range(out_edges(u, G))) {
     432                    const Vertex v = target(e, G);
     433                    if (LLVM_UNLIKELY(getType(v) != innerTypeId)) {
     434                        beneficial = false;
     435                        break;
     436                    }
     437                }
     438                if (beneficial) {
     439                    for (auto e : make_iterator_range(out_edges(u, G))) {
     440                        const auto v = target(e, G);
     441                        const auto f = std::lower_bound(D.begin(), D.end(), v);
     442                        if (f == D.end() || *f != v) {
     443                            D.insert(f, v);
     444                            assert (std::is_sorted(D.begin(), D.end()));
     445                        }
     446                    }
     447                    for (auto e : make_iterator_range(in_edges(u, G))) {
     448                        const auto v = source(e, G);
     449                        const auto f = std::lower_bound(L.begin(), L.end(), v);
     450                        if (f == L.end() || *f != v) {
     451                            L.insert(f, v);
     452                            assert (std::is_sorted(L.begin(), L.end()));
     453                        }
     454                    }
     455                }
     456            }
     457        }
     458
     459        // D = D - L
     460
     461        if (!L.empty()) {
     462            if (!D.empty()) {
     463                auto di = D.begin(), li = L.begin(), li_end = L.end();
     464                for (;;) {
     465                    if (*li < *di) {
     466                        if (++li == li_end) {
     467                            break;
     468                        }
     469                    } else {
     470                        if (*di < *li) {
     471                            ++di;
     472                        } else {
     473                            di = D.erase(di);
     474                        }
     475                        if (di == D.end()) {
     476                            break;
     477                        }
     478                    }
     479                }
     480            }
     481            L.clear();
     482        }
     483    }
     484
     485    /** ------------------------------------------------------------------------------------------------------------- *
     486     * @brief applyDistributivityLaw
     487     ** ------------------------------------------------------------------------------------------------------------- */
     488    Modification applyDistributivityLaw() {
     489
     490        identifyDistributableVertices();
     491
     492        // If we found no potential opportunities then we cannot apply the distribution law to any part of G.
     493        if (D.empty()) {
     494            return None;
     495        }
     496
     497        Modification modified = None;
     498
     499        const auto lowerSet = independentCliqueSets<1>(removeUnhelpfulBicliques(enumerateBicliques(D)), 1);
     500
     501        for (auto & lower : lowerSet) {
     502            const auto upperSet = independentCliqueSets<0>(enumerateBicliques(std::get<1>(lower)), 2);
     503            for (const auto & upper : upperSet) {
     504
     505                const auto & sources = std::get<1>(upper);
     506                const auto & intermediary = std::get<0>(upper);
     507                const auto & sinks = std::get<0>(lower);
     508
     509
     510
     511                const auto outerTypeId = getType(sinks.front());
     512                const auto innerTypeId = oppositeTypeId(outerTypeId);
     513
     514                errs() << " -- distributing\n";
     515
     516                // Update G to match the desired change
     517                const auto x = makeVertex(outerTypeId);
     518                const auto y = makeVertex(innerTypeId);
     519
     520                for (const auto i : intermediary) {
     521                    assert (getType(i) == innerTypeId);
     522                    for (const Vertex t : sinks) {
     523                        assert (getType(t) == outerTypeId);
     524                        remove_edge(i, t, G);
     525                    }
     526                    addEdge(i, x);
     527                }
     528
     529                for (const Vertex s : sources) {
     530                    for (const Vertex i : intermediary) {
     531                        remove_edge(s, i, G);
     532                    }
     533                    addEdge(s, y);
     534                }
     535                addEdge(x, y);
     536
     537                for (const Vertex t : sinks) {
     538                    addEdge(y, t, std::get<0>(G[t]));
     539                }
     540
     541                modified = Structural;
    422542            }
    423543        }
    424544
    425545        D.clear();
    426     }
    427 
    428 
    429     /** ------------------------------------------------------------------------------------------------------------- *
    430      * @brief redistributeAST
    431      *
    432      * Apply the distribution law to reduce computations whenever possible.
    433      ** ------------------------------------------------------------------------------------------------------------- */
    434     bool simplifyGraph() {
    435 
    436         VertexSet sinks;
    437 
    438         bool modified = false;
    439 
    440         for (;;) {
    441 
    442             assert (num_vertices(H) == 0 && num_edges(H) == 0);
    443 
    444             if (contractGraph()) {
    445                 modified = true;
    446             }
    447 
    448             generateDistributionGraph();
    449 
    450             // If we found no potential opportunities then we cannot apply the distribution law to any part of G.
    451             if (num_vertices(H) == 0) {
    452                 break;
    453             }
    454 
    455             for (const Vertex u : make_iterator_range(vertices(H))) {
    456                 if (out_degree(u, H) == 0 && in_degree(u, H) != 0) {
    457                     sinks.push_back(u);
    458                 }
    459             }
    460             std::sort(sinks.begin(), sinks.end());
    461 
    462             bool done = true;
    463 
    464             const auto lowerSet = independentCliqueSets<1>(removeUnhelpfulBicliques(enumerateBicliques(sinks)), 1);
    465 
    466             for (auto & lower : lowerSet) {
    467                 const auto upperSet = independentCliqueSets<0>(enumerateBicliques(std::get<1>(lower)), 2);
    468                 for (const auto & upper : upperSet) {
    469 
    470                     const auto & sources = std::get<1>(upper);
    471                     const auto & intermediary = std::get<0>(upper);
    472                     const auto & sinks = std::get<0>(lower);
    473 
    474                     const auto outerTypeId = getType(H[sinks.front()]);
    475                     const auto innerTypeId = oppositeTypeId(outerTypeId);
    476 
    477                     // Update G to match the desired change
    478                     const auto x = makeVertex(outerTypeId);
    479                     const auto y = makeVertex(innerTypeId);
    480 
    481                     for (const auto i : intermediary) {
    482                         const auto u = H[i];
    483                         assert (getType(u) == innerTypeId);
    484                         for (const Vertex t : sinks) {
    485                             assert (getType(H[t]) == outerTypeId);
    486                             remove_edge(u, H[t], G);
    487                         }
    488                         add_edge(u, x, nullptr, G);
    489                     }
    490 
    491                     for (const Vertex s : sources) {
    492                         const auto u = H[s];
    493                         for (const Vertex i : intermediary) {
    494                             remove_edge(u, H[i], G);
    495                         }
    496                         add_edge(u, y, nullptr, G);
    497                     }
    498                     add_edge(x, y, nullptr, G);
    499 
    500                     for (const Vertex t : sinks) {
    501                         const auto v = H[t];
    502                         add_edge(y, v, std::get<0>(G[v]), G);
    503                     }
    504 
    505                     done = false;
    506                 }
    507             }
    508 
    509             H.clear();
    510 
    511             if (done) {
    512                 break;
    513             } else {
    514                 sinks.clear();
    515                 modified = true;
    516             }
    517         }
    518 
    519         assert (num_vertices(H) == 0 && num_edges(H) == 0);
    520546
    521547        return modified;
     
    536562        IntersectionSets B1;
    537563        for (auto u : A) {
    538             if (in_degree(u, H) > 0) {
     564            if (in_degree(u, G) > 0) {
    539565                VertexSet incomingAdjacencies;
    540                 incomingAdjacencies.reserve(in_degree(u, H));
    541                 for (auto e : make_iterator_range(in_edges(u, H))) {
    542                     incomingAdjacencies.push_back(source(e, H));
     566                incomingAdjacencies.reserve(in_degree(u, G));
     567                for (auto e : make_iterator_range(in_edges(u, G))) {
     568                    incomingAdjacencies.push_back(source(e, G));
    543569                }
    544570                std::sort(incomingAdjacencies.begin(), incomingAdjacencies.end());
     
    590616            for (const Vertex u : *Bi) {
    591617                VertexSet Aj;
    592                 Aj.reserve(out_degree(u, H));
    593                 for (auto e : make_iterator_range(out_edges(u, H))) {
    594                     Aj.push_back(target(e, H));
     618                Aj.reserve(out_degree(u, G));
     619                for (auto e : make_iterator_range(out_edges(u, G))) {
     620                    Aj.push_back(target(e, G));
    595621                }
    596622                std::sort(Aj.begin(), Aj.end());
     
    673699            VertexSet & B = std::get<1>(*ci);
    674700            for (auto bi = B.begin(); bi != B.end(); ) {
    675                 if (out_degree(H[*bi], G) == cardinalityA) {
     701                if (out_degree(*bi, G) == cardinalityA) {
    676702                    ++bi;
    677703                } else {
     
    689715
    690716    /** ------------------------------------------------------------------------------------------------------------- *
    691      * @brief addTemporary
     717     * @brief makeVertex
    692718     ** ------------------------------------------------------------------------------------------------------------- */
    693719    Vertex makeVertex(const TypeId typeId, PabloAST * const expr = nullptr) {
     
    711737        const auto u = makeVertex(typeId, expr);
    712738        M.emplace(expr, u);
     739        if (LLVM_UNLIKELY(isa<Not>(expr))) {
     740            PabloAST * const negated = cast<Not>(expr)->getExpr();
     741            addEdge(addExpression(negated), u, negated);
     742        }
    713743        return u;
    714744    }
     
    717747     * @brief addStatement
    718748     ** ------------------------------------------------------------------------------------------------------------- */
    719     void addStatement(Statement * const stmt) {
     749    Vertex addStatement(Statement * const stmt) {
    720750        assert (M.count(stmt) == 0);
    721751        const auto typeId = stmt->getClassTypeId();
    722         const auto u = makeVertex(typeId, stmt);
    723         M.emplace(stmt, u);
    724         for (unsigned i = 0; i != stmt->getNumOperands(); ++i) {
    725             PabloAST * const op = stmt->getOperand(i);
    726             if (LLVM_UNLIKELY(isa<Integer>(op) || isa<String>(op))) {
    727                 continue;
    728             }
    729             const auto v = addExpression(op);
    730             add_edge(v, u, op, G);
    731             if (LLVM_UNLIKELY(isa<Not>(op))) {
    732                 PabloAST * const negated = cast<Not>(op)->getExpr();
    733                 add_edge(addExpression(negated), v, negated, G);
    734             }
    735         }
     752        if (LLVM_UNLIKELY(typeId == TypeId::Sel)) {
     753
     754            // expand Sel (C,T,F) statements into (C ∧ T) √ (C ∧ F)
     755
     756            const auto c = addExpression(cast<Sel>(stmt)->getCondition());
     757            const auto t = addExpression(cast<Sel>(stmt)->getTrueExpr());
     758            const auto l = makeVertex(TypeId::And);
     759            addEdge(c, l);
     760            addEdge(t, l);
     761            const auto n = makeVertex(TypeId::Not);
     762            addEdge(c, n);
     763            const auto r = makeVertex(TypeId::And);
     764            const auto f = addExpression(cast<Sel>(stmt)->getFalseExpr());
     765            addEdge(n, r);
     766            addEdge(f, r);
     767            const auto u = makeVertex(TypeId::Or, stmt);
     768            M.emplace(stmt, u);
     769            addEdge(l, u);
     770            addEdge(r, u);
     771
     772            return u;
     773
     774        } else {
     775
     776            const auto u = makeVertex(typeId, stmt);
     777            M.emplace(stmt, u);
     778            for (unsigned i = 0; i != stmt->getNumOperands(); ++i) {
     779                PabloAST * const op = stmt->getOperand(i);
     780                if (LLVM_UNLIKELY(isa<String>(op))) {
     781                    continue;
     782                }
     783                addEdge(addExpression(op), u, op);
     784            }
     785
     786            return u;
     787        }
     788
     789    }
     790
     791    /** ------------------------------------------------------------------------------------------------------------- *
     792     * @brief addBranch
     793     ** ------------------------------------------------------------------------------------------------------------- */
     794    void addBranch(Statement * const br) {
     795        const auto u = addStatement(br);
     796        for (auto escaped : cast<Branch>(br)->getEscaped()) {
     797            addEdge(u, addExpression(escaped), escaped);
     798        }
     799    }
     800
     801
     802    /** ------------------------------------------------------------------------------------------------------------- *
     803     * @brief addEdge
     804     ** ------------------------------------------------------------------------------------------------------------- */
     805    void addEdge(const Vertex u, const Vertex v, PabloAST * const value = nullptr) {
     806        const auto typeId = getType(v);
     807        if (isAssociative(typeId)) {
     808            for (auto e : make_iterator_range(in_edges(u, G))) {
     809                if (LLVM_UNLIKELY(source(e, G) == u)) {
     810                    if (LLVM_LIKELY(isDistributive(typeId))) {
     811                        G[e] = std::max(G[e], value);
     812                    } else {
     813                        remove_edge(e, G);
     814                    }
     815                    return;
     816                }
     817            }
     818        }
     819        boost::add_edge(u, v, value, G);
     820    }
     821
     822    /** ------------------------------------------------------------------------------------------------------------- *
     823     * @brief removeVertex
     824     ** ------------------------------------------------------------------------------------------------------------- */
     825    void removeVertex(const Vertex u) {
     826        clear_vertex(u, G);
     827        setType(u, TypeId::Var);
    736828    }
    737829
     
    775867    }
    776868
     869    static bool isImmutable(const TypeId typeId) {
     870        return (typeId == TypeId::Var || typeId == TypeId::Assign || typeId == TypeId::Extract);
     871    }
     872
    777873    static TypeId oppositeTypeId(const TypeId typeId) {
    778874        assert (isDistributive(typeId));
     
    780876    }
    781877
    782     void add_edge(const Vertex u, const Vertex v, PabloAST * const value, Graph & G) {
    783         G[std::get<0>(boost::add_edge(u, v, G))] = value;
    784     }
    785 
    786878private:
    787879
    788880    Graph G;
    789881    flat_map<pablo::PabloAST *, Vertex> M;
    790 
    791     DistributionGraph H;
    792     flat_map<Vertex, DistributionVertex> D;
     882    VertexSet D;
     883    VertexSet L;
    793884
    794885};
     
    798889 ** ------------------------------------------------------------------------------------------------------------- */
    799890bool DistributivePass::optimize(PabloKernel * const kernel) {
     891    #ifdef NDEBUG
     892    report_fatal_error("DistributivePass is unsupported");
     893    #endif
    800894    PassContainer C;
    801     C.run(kernel->getEntryBlock());
     895    C.run(kernel);
    802896    return true;
    803897}
  • icGREP/icgrep-devel/icgrep/pablo/optimizers/pablo_simplifier.cpp

    r5454 r5486  
    2929/** ------------------------------------------------------------------------------------------------------------- *
    3030 * @brief fold
    31  *
    32  * Note: if folding alters this variadic without making any other modification to the AST, it will return null as
    33  * if no change was made.
    34  ** ------------------------------------------------------------------------------------------------------------- */
    35 inline PabloAST * Simplifier::fold(Variadic * var, PabloBlock * const block) {
    36 
    37     assert (var);
    38 
    39     bool negated = false;
    40     if (LLVM_UNLIKELY(isa<Xor>(var))) {
    41         for (unsigned i = 0; i != var->getNumOperands(); ++i) {
    42             if (isa<Not>(var->getOperand(i))) {
    43                 // (A ⊕ ¬B) = (A ⊕ (B ⊕ 1)) = ¬(A ⊕ B)
    44                 var->setOperand(i, cast<Not>(var->getOperand(i))->getOperand(0));
    45                 negated = !negated;
    46             }
    47         }
    48     }
    49 
    50     // Ensure all operands of a reassociatiable function are consistently ordered.
    51     std::sort(var->begin(), var->end());
    52 
    53     // Apply the idempotence law to any And and Or statement and the identity law to any Xor
    54     for (int i = var->getNumOperands() - 1; i > 0; --i) {
    55         if (LLVM_UNLIKELY(equals(var->getOperand(i), var->getOperand(i - 1)))) {
    56             var->removeOperand(i);
    57             if (LLVM_UNLIKELY(isa<Xor>(var))) {
    58                 var->removeOperand(--i);
    59             }
    60         }
    61     }
    62 
    63     // Apply the annihilator and identity laws
    64     for (unsigned i = 0; i != var->getNumOperands(); ) {
    65         if (LLVM_UNLIKELY(isa<Zeroes>(var->getOperand(i)))) {
    66             if (LLVM_UNLIKELY(isa<And>(var))) {
    67                 return block->createZeroes(var->getType());
    68             }
    69             var->removeOperand(i);
    70             continue;
    71         } else if (LLVM_UNLIKELY(isa<Ones>(var->getOperand(i)))) {
    72             if (LLVM_UNLIKELY(isa<Or>(var))) {
    73                 return block->createOnes(var->getType());
    74             } else if (LLVM_UNLIKELY(isa<Xor>(var))) {
    75                 negated = !negated;
    76             }
    77             var->removeOperand(i);
    78             continue;
    79         }
    80         ++i;
    81     }
    82 
    83     PabloAST * replacement = nullptr;
    84 
    85     if (LLVM_LIKELY(isa<And>(var) || isa<Or>(var))) {
    86         // Apply an implicit distribution + identity law whenever possible
    87         //    (P ∧ Q) √ (P ∧ ¬Q) = P √ (Q ∧ ¬Q) ⇔ P
    88         TypeId typeId = isa<And>(var) ? TypeId::Or : TypeId::And;
    89         for (unsigned i = 0; i < var->getNumOperands(); ++i) {
    90             if (var->getOperand(i)->getClassTypeId() == typeId) {
    91                 Variadic * const Vi = cast<Variadic>(var->getOperand(i));
    92                 // Ensure the i-th operand is sorted incase it was altered after being folded.
    93                 std::sort(Vi->begin(), Vi->end());
    94                 for (unsigned j = 0; j < i; ++j) {
    95                     assert (var->getOperand(i) == Vi);
    96                     if (var->getOperand(j)->getClassTypeId() == typeId) {
    97                         Variadic * const Vj = cast<Variadic>(var->getOperand(j));
    98                         assert (std::is_sorted(Vj->begin(), Vj->end()));
    99                         if (Vi->getNumOperands() == Vj->getNumOperands()) {
    100                             // If vi and vj differ by precisely one operand, say di and dj,
    101                             // and di ⇔ ¬dj, we can apply this rule.
    102                             unsigned vi = 0, vj = 0;
    103                             const unsigned operands = Vi->getNumOperands();
    104                             unsigned di = operands - 1, dj = operands - 1;
    105                             bool differsByOne = true;
    106                             while (vi < operands && vj < operands) {
    107                                 if (Vi->getOperand(vi) < Vj->getOperand(vj)) {
    108                                     if (LLVM_UNLIKELY(di != (operands - 1))) { // <- we want the branch predictor to fail only once
    109                                         differsByOne = false;
    110                                         break;
    111                                     }
    112                                     di = vi++;
    113                                 } else if (Vj->getOperand(vj) < Vi->getOperand(vi)) {
    114                                     if (LLVM_UNLIKELY(dj != (operands - 1))) {
    115                                         differsByOne = false;
    116                                         break;
    117                                     }
    118                                     dj = vj++;
    119                                 } else {
    120                                     ++vi;
    121                                     ++vj;
    122                                 }
    123                             }
    124                             if (LLVM_UNLIKELY(differsByOne)) {
    125                                 assert (di < operands && dj < operands);
    126                                 assert ("Found an equivalent set of operations that was not deduced earlier!" && (!equals(Vi, Vj)));
    127                                 // test if di ⇔ ¬dj
    128                                 bool apply = false;
    129                                 if (isa<Not>(Vi->getOperand(di))) {
    130                                     apply = cast<Not>(Vi->getOperand(di))->getOperand(0) == Vj->getOperand(dj);
    131                                 } else if (isa<Not>(Vj->getOperand(dj))) {
    132                                     apply = cast<Not>(Vj->getOperand(dj))->getOperand(0) == Vi->getOperand(di);
    133                                 }
    134                                 if (LLVM_UNLIKELY(apply)) {
    135                                     // Although we can apply this transformation, we have a potential problem. If P is not a "literal", we
    136                                     // cannot optimize var without creating a new And/Or statement. However, the redundancy elimination
    137                                     // pass will miss this new statement unless we mark "var" as its own replacement. We'll end up checking
    138                                     // "var" again but termination is still guaranteed once none of the new statements can be replaced by
    139                                     // prior statements in the AST.
    140                                     PabloAST * expr = nullptr;
    141                                     if (operands == 2) {
    142                                         expr = Vi->getOperand(1 ^ di);
    143                                         if (LLVM_LIKELY(var->getNumOperands() == 2)) {
    144                                             return expr;
    145                                         }
    146                                     } else { // if (operands > 2) {
    147                                         assert (operands > 2);
    148                                         block->setInsertPoint(var->getPrevNode());
    149                                         if (typeId == TypeId::And) {
    150                                             expr = block->createAnd(var->getType(), operands - 1);
    151                                         } else { // if (typeId == TypeId::Or) {
    152                                             expr = block->createOr(var->getType(), operands - 1);
    153                                         }
    154                                         for (unsigned k = 0; k != di; ++k) {
    155                                             cast<Variadic>(expr)->addOperand(Vi->getOperand(k));
    156                                         }
    157                                         for (unsigned k = di + 1; k < operands; ++k) {
    158                                             cast<Variadic>(expr)->addOperand(Vi->getOperand(k));
    159                                         }
    160                                         replacement = var;
    161                                     }
    162                                     var->removeOperand(i);
    163                                     var->removeOperand(j);
    164                                     bool unique = true;
    165                                     for (unsigned k = 0; k != var->getNumOperands(); ++k) {
    166                                         if (LLVM_UNLIKELY(equals(var->getOperand(k), expr))) {
    167                                             unique = false;
    168                                             break;
    169                                         }
    170                                     }
    171                                     if (LLVM_LIKELY(unique)) {
    172                                         var->addOperand(expr);
    173                                     }
    174                                     i -= 2;
    175                                     break; // out of for j = 0 to i - 1
    176                                 }
    177                             }
    178                         }
    179                     }
    180                 }
    181             }
    182         }
    183 
    184         // Apply the absorption law whenever possible
    185         //   P √ (P ∧ Q) ⇔ P ∧ (P √ Q) ⇔ P
    186         for (unsigned i = 1; i < var->getNumOperands(); ++i) {
    187             PabloAST * const op = var->getOperand(i);
    188             if (op->getClassTypeId() == typeId) {
    189                 Variadic * const vi = cast<Variadic>(op);
    190                 assert (std::is_sorted(vi->begin(), vi->end()));
    191                 for (unsigned j = 0; j < i; ++j) {
    192                     assert (var->getOperand(i) == vi);
    193                     if (var->getOperand(j)->getClassTypeId() == typeId) {
    194                         Variadic * const vj = cast<Variadic>(var->getOperand(j));
    195                         assert (std::is_sorted(vj->begin(), vj->end()));
    196                         if (vi->getNumOperands() < vj->getNumOperands()) {
    197                             if (LLVM_UNLIKELY(std::includes(vi->begin(), vi->end(), vj->begin(), vj->end()))) {
    198                                 var->removeOperand(i--);
    199                                 break;
    200                             }
    201                         } else { // if (vi->getNumOperands() >= vj->getNumOperands()) {
    202                             if (LLVM_UNLIKELY(std::includes(vj->begin(), vj->end(), vi->begin(), vi->end()))) {
    203                                 var->removeOperand(j--);
    204                                 --i;
    205                             }
    206                         }
    207                     }
    208                 }
    209             } else { // treat the operand as a literal
    210                 for (unsigned j = 0; j < var->getNumOperands(); ) {
    211                     if (var->getOperand(j)->getClassTypeId() == typeId) {
    212                         Variadic * const vj = cast<Variadic>(var->getOperand(j));
    213                         assert (std::is_sorted(vj->begin(), vj->end()));
    214                         if (LLVM_UNLIKELY(std::binary_search(vj->begin(), vj->end(), op))) {
    215                             var->removeOperand(j);
    216                             continue;
    217                         }
    218                     }
    219                     ++j;
    220                 }
    221             }
    222         }
    223 
    224         // Apply the complementation law whenever possible.
    225         for (unsigned i = 0; i < var->getNumOperands(); ++i) {
    226             if (isa<Not>(var->getOperand(i))) {
    227                 const PabloAST * const negated = cast<Not>(var->getOperand(i))->getOperand(0);
    228                 for (unsigned j = 0; j != var->getNumOperands(); ++j) {
    229                     if (LLVM_UNLIKELY(var->getOperand(j) == negated)) {
    230                         if (isa<And>(var)) { // (A ∧ ¬A) ∧ B ⇔ 0 for any B
    231                             return block->createZeroes(var->getType());
    232                         } else { // if (isa<Or>(var)) { // (A √ ¬A) √ B ⇔ 1 for any B
    233                             return block->createOnes(var->getType());
    234                         }
    235                     }
    236                 }
    237             }
    238         }
    239 
    240     }
    241 
    242     if (LLVM_UNLIKELY(var->getNumOperands() < 2)) {
    243         if (LLVM_UNLIKELY(var->getNumOperands() == 0)) {
    244             return block->createZeroes(var->getType());
    245         }
    246         replacement = var->getOperand(0);
    247     }
    248     if (LLVM_UNLIKELY(negated)) {
    249         assert (isa<Xor>(var));
    250         block->setInsertPoint(var);
    251         replacement = block->createNot(replacement ? replacement : var);
    252     }
    253     return replacement;
    254 }
    255 
    256 /** ------------------------------------------------------------------------------------------------------------- *
    257  * @brief fold
    258  ** ------------------------------------------------------------------------------------------------------------- */
    259 PabloAST * Simplifier::fold(Statement * stmt, PabloBlock * const block) {
    260     if (isa<Variadic>(stmt)) {
    261         return fold(cast<Variadic>(stmt), block);
    262     } else if (isa<Not>(stmt)) {
     31 ** ------------------------------------------------------------------------------------------------------------- */
     32PabloAST * Simplifier::triviallyFold(Statement * stmt, PabloBlock * const block) {
     33    if (isa<Not>(stmt)) {
    26334        PabloAST * value = stmt->getOperand(0);
    26435        if (LLVM_UNLIKELY(isa<Not>(value))) {
     
    479250            }
    480251
    481             Statement * const prior = stmt->getPrevNode();
    482             PabloAST * const folded = fold(stmt, block);
     252            PabloAST * const folded = triviallyFold(stmt, block);
    483253            if (folded) {
    484                 // If we determine we can fold this statement, go back to the original prior node of this statement.
    485                 // New statements may have been inserted after it.
    486                 stmt->replaceWith(folded, true);
    487                 stmt = LLVM_LIKELY(prior != nullptr) ? prior->getNextNode() : block->front();
    488                 continue;
    489             }
     254                stmt = stmt->replaceWith(folded);
     255                continue;
     256            }
     257
    490258            // By recording which statements have already been seen, we can detect the redundant statements
    491259            // as any having the same type and operands. If so, we can replace its users with the prior statement.
     
    493261            const auto f = expressions.findOrAdd(stmt);
    494262            if (!f.second) {
    495                 stmt = stmt->replaceWith(f.first, true);
     263                stmt = stmt->replaceWith(f.first);
    496264                continue;
    497265            }
  • icGREP/icgrep-devel/icgrep/pablo/optimizers/pablo_simplifier.hpp

    r5217 r5486  
    2020    Simplifier() = default;
    2121private:
    22     static PabloAST * fold(Variadic * var, PabloBlock * const block);
    23     static PabloAST * fold(Statement * const stmt, PabloBlock * const block);
     22    static PabloAST * triviallyFold(Statement * const stmt, PabloBlock * const block);
    2423    static void redundancyElimination(PabloBlock * const block, ExpressionTable * et = nullptr, VariableTable * const vt = nullptr);
    2524    static void deadCodeElimination(PabloKernel * const kernel);
  • icGREP/icgrep-devel/icgrep/pablo/pabloAST.h

    r5371 r5486  
    3434
    3535    using Allocator = SlabAllocator<PabloAST *>;
    36     using UserAllocator = ProxyAllocator<PabloAST *>;
    37     using Users = std::vector<PabloAST *, UserAllocator>;
     36    using Users = std::vector<PabloAST *, ProxyAllocator<PabloAST *>>;
    3837    using user_iterator = Users::iterator;
    3938    using const_user_iterator = Users::const_iterator;
     
    155154    }
    156155    bool addUser(PabloAST * const user);
     156
    157157    bool removeUser(PabloAST * const user);
    158     virtual ~PabloAST() {
    159         mUsers.clear();
    160     }       
     158
     159    virtual ~PabloAST() = default;
     160
    161161private:
    162162    const ClassTypeId       mClassTypeId;
     
    226226        return mParent;
    227227    }
    228     virtual ~Statement() {}
     228    virtual ~Statement() = default;
     229
    229230protected:
    230231
     
    283284};
    284285
     286class CarryProducingStatement : public Statement {
     287public:
     288
     289    static inline bool classof(const PabloAST * e) {
     290        switch (e->getClassTypeId()) {
     291            case PabloAST::ClassTypeId::Advance:
     292            case PabloAST::ClassTypeId::ScanThru:
     293            case PabloAST::ClassTypeId::AdvanceThenScanThru:
     294            case PabloAST::ClassTypeId::ScanTo:
     295            case PabloAST::ClassTypeId::AdvanceThenScanTo:
     296            case PabloAST::ClassTypeId::MatchStar:
     297                return true;
     298            default: return false;
     299        }
     300    }
     301    static inline bool classof(const CarryProducingStatement *) {
     302        return true;
     303    }
     304    static inline bool classof(const void *) {
     305        return false;
     306    }
     307
     308    unsigned getCarryGroup() const {
     309        return mCarryGroup;
     310    }
     311
     312    void setCarryGroup(const unsigned carryGroup) {
     313        mCarryGroup = carryGroup;
     314    }
     315
     316    virtual ~CarryProducingStatement() = default;
     317
     318protected:
     319
     320    explicit CarryProducingStatement(const ClassTypeId id, llvm::Type * const type, std::initializer_list<PabloAST *> operands, const String * const name, Allocator & allocator)
     321    : Statement(id, type, operands, name, allocator)
     322    , mCarryGroup(0) {
     323
     324    }
     325
     326    explicit CarryProducingStatement(const ClassTypeId id, llvm::Type * const type, const unsigned reserved, const String * name, Allocator & allocator)
     327    : Statement(id, type, reserved, name, allocator)
     328    , mCarryGroup(0) {
     329
     330    }
     331
     332    template<typename iterator>
     333    explicit CarryProducingStatement(const ClassTypeId id, llvm::Type * const type, iterator begin, iterator end, const String * name, Allocator & allocator)
     334    : Statement(id, type, begin, end, name, allocator)
     335    , mCarryGroup(0) {
     336
     337    }
     338
     339private:
     340
     341    unsigned mCarryGroup;
     342};
     343
     344
    285345class Variadic : public Statement {
    286346public:
     
    343403        return iterator(mOperand + mOperands);
    344404    }
     405
     406    virtual ~Variadic() = default;
    345407
    346408protected:
  • icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.cpp

    r5440 r5486  
    4848    examineBlock(iBuilder, mKernel->getEntryBlock());
    4949    mCarryManager->initializeCarryData(iBuilder, mKernel);
     50}
     51
     52void PabloCompiler::releaseKernelData(const std::unique_ptr<kernel::KernelBuilder> &  iBuilder) {
     53    assert ("PabloCompiler does not have a IDISA iBuilder" && iBuilder);
     54    mCarryManager->releaseCarryData(iBuilder, mKernel);
    5055}
    5156
  • icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.h

    r5440 r5486  
    4242    void compile(const std::unique_ptr<kernel::KernelBuilder> &  iBuilder);
    4343
     44    void releaseKernelData(const std::unique_ptr<kernel::KernelBuilder> &  iBuilder);
     45
    4446private:
    4547
  • icGREP/icgrep-devel/icgrep/pablo/pablo_kernel.cpp

    r5464 r5486  
    145145    iBuilder->setScalarField("EOFmask", iBuilder->bitblock_mask_from(remainingBytes));
    146146    CreateDoBlockMethodCall(iBuilder);
     147}
     148
     149void PabloKernel::generateFinalizeMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
     150    mPabloCompiler->releaseKernelData(iBuilder);
    147151}
    148152
  • icGREP/icgrep-devel/icgrep/pablo/pablo_kernel.h

    r5454 r5486  
    158158    void generateFinalBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder, llvm::Value * remainingBytes) final;
    159159
     160    void generateFinalizeMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) final;
     161
    160162private:
    161163
  • icGREP/icgrep-devel/icgrep/pablo/pe_advance.h

    r5454 r5486  
    1313namespace pablo {
    1414
    15 class Advance final : public Statement {
     15class Advance final : public CarryProducingStatement {
    1616    friend class PabloBlock;
    1717public:
     
    3232protected:
    3333    Advance(PabloAST * expr, PabloAST * shiftAmount, const String * name, Allocator & allocator)
    34     : Statement(ClassTypeId::Advance, expr->getType(), {expr, shiftAmount}, name, allocator) {
     34    : CarryProducingStatement(ClassTypeId::Advance, expr->getType(), {expr, shiftAmount}, name, allocator) {
    3535        assert(llvm::isa<Integer>(shiftAmount));
    3636    }
  • icGREP/icgrep-devel/icgrep/pablo/pe_matchstar.h

    r5454 r5486  
    1212namespace pablo {
    1313
    14 class MatchStar final : public Statement {
     14class MatchStar final : public CarryProducingStatement {
    1515    friend class PabloBlock;
    1616public:
     
    3030protected:
    3131    MatchStar(PabloAST * marker,  PabloAST * cc, const String * name, Allocator & allocator)
    32     : Statement(ClassTypeId::MatchStar, marker->getType(), {marker, cc}, name, allocator) {
     32    : CarryProducingStatement(ClassTypeId::MatchStar, marker->getType(), {marker, cc}, name, allocator) {
    3333    }
    3434};
  • icGREP/icgrep-devel/icgrep/pablo/pe_scanthru.h

    r5329 r5486  
    1212namespace pablo {
    1313
    14 class ScanThru : public Statement {
     14class ScanThru : public CarryProducingStatement {
    1515    friend class PabloBlock;
    1616public:
     
    3131protected:
    3232    ScanThru(PabloAST * from, PabloAST * thru, const String * name, Allocator & allocator)
    33     : Statement(ClassTypeId::ScanThru, from->getType(), {from, thru}, name, allocator) {
     33    : CarryProducingStatement(ClassTypeId::ScanThru, from->getType(), {from, thru}, name, allocator) {
    3434
    3535    }
    3636};
    3737
    38 class ScanTo : public Statement {
     38class ScanTo : public CarryProducingStatement {
    3939    friend class PabloBlock;
    4040public:
     
    5555protected:
    5656    ScanTo(PabloAST * from, PabloAST * to, const String * name, Allocator & allocator)
    57     : Statement(ClassTypeId::ScanTo, from->getType(), {from, to}, name, allocator) {
     57    : CarryProducingStatement(ClassTypeId::ScanTo, from->getType(), {from, to}, name, allocator) {
    5858
    5959    }
    6060};
    6161
    62 class AdvanceThenScanThru : public Statement {
     62class AdvanceThenScanThru : public CarryProducingStatement {
    6363    friend class PabloBlock;
    6464public:
     
    7979protected:
    8080    AdvanceThenScanThru(PabloAST * from, PabloAST * thru, const String * name, Allocator & allocator)
    81     : Statement(ClassTypeId::AdvanceThenScanThru, from->getType(), {from, thru}, name, allocator) {
     81    : CarryProducingStatement(ClassTypeId::AdvanceThenScanThru, from->getType(), {from, thru}, name, allocator) {
    8282
    8383    }
    8484};
    8585
    86 class AdvanceThenScanTo : public Statement {
     86class AdvanceThenScanTo : public CarryProducingStatement {
    8787    friend class PabloBlock;
    8888public:
     
    103103protected:
    104104    AdvanceThenScanTo(PabloAST * from, PabloAST * to, const String * name, Allocator & allocator)
    105     : Statement(ClassTypeId::AdvanceThenScanTo, from->getType(), {from, to}, name, allocator) {
     105    : CarryProducingStatement(ClassTypeId::AdvanceThenScanTo, from->getType(), {from, to}, name, allocator) {
    106106
    107107    }
  • icGREP/icgrep-devel/icgrep/toolchain/cpudriver.cpp

    r5474 r5486  
    7171    mTarget = builder.selectTarget();
    7272    if (LLVM_LIKELY(codegen::EnableObjectCache)) {
    73         if (codegen::ObjectCacheDir.empty()) {
     73        if (codegen::ObjectCacheDir) {
     74            mCache = new ParabixObjectCache(codegen::ObjectCacheDir);
     75        } else {
    7476            mCache = new ParabixObjectCache();
    75         } else {
    76             mCache = new ParabixObjectCache(codegen::ObjectCacheDir);
    7777        }
    7878        mEngine->setObjectCache(mCache);
     
    117117        k->initializeInstance(iBuilder);
    118118    }
    119     if (codegen::pipelineParallel) {
     119    if (codegen::PipelineParallel) {
    120120        generateParallelPipeline(iBuilder, mPipeline);
    121     } else if (codegen::segmentPipelineParallel) {
     121    } else if (codegen::SegmentPipelineParallel) {
    122122        generateSegmentParallelPipeline(iBuilder, mPipeline);
    123123    } else {
    124         codegen::ThreadNum = 1;
    125124        generatePipelineLoop(iBuilder, mPipeline);
    126125    }
     
    132131Function * ParabixDriver::addLinkFunction(Module * mod, llvm::StringRef name, FunctionType * type, void * functionPtr) const {
    133132    assert ("addKernelCall or makeKernelCall must be called before LinkFunction" && (mod != nullptr));
    134     Function * const f = cast<Function>(mod->getOrInsertFunction(name, type));
    135     mEngine->addGlobalMapping(f, functionPtr);
     133    Function * f = mod->getFunction(name);
     134    if (LLVM_UNLIKELY(f == nullptr)) {
     135        f = Function::Create(type, Function::ExternalLinkage, name, mod);
     136        mEngine->addGlobalMapping(f, functionPtr);
     137    } else if (LLVM_UNLIKELY(f->getType() != type->getPointerTo())) {
     138        report_fatal_error("Cannot link " + name + ": a function with a differing signature already exists with that name in " + mod->getName());
     139    }
    136140    return f;
    137141}
     
    142146    std::unique_ptr<raw_fd_ostream> IROutputStream(nullptr);
    143147    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowUnoptimizedIR))) {
    144         if (codegen::IROutputFilename.empty()) {
    145             IROutputStream.reset(new raw_fd_ostream(STDERR_FILENO, false, false));
    146         } else {
     148        if (codegen::IROutputFilename) {
    147149            std::error_code error;
    148150            IROutputStream.reset(new raw_fd_ostream(codegen::IROutputFilename, error, sys::fs::OpenFlags::F_None));
     151        } else {
     152            IROutputStream.reset(new raw_fd_ostream(STDERR_FILENO, false, false));
    149153        }
    150154        PM.add(createPrintModulePass(*IROutputStream));
     
    161165    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowIR))) {
    162166        if (LLVM_LIKELY(IROutputStream == nullptr)) {
    163             if (codegen::IROutputFilename.empty()) {
    164                 IROutputStream.reset(new raw_fd_ostream(STDERR_FILENO, false, false));
    165             } else {
     167            if (codegen::IROutputFilename) {
    166168                std::error_code error;
    167169                IROutputStream.reset(new raw_fd_ostream(codegen::IROutputFilename, error, sys::fs::OpenFlags::F_None));
     170            } else {
     171                IROutputStream.reset(new raw_fd_ostream(STDERR_FILENO, false, false));
    168172            }
    169173        }
     
    174178    std::unique_ptr<raw_fd_ostream> ASMOutputStream(nullptr);
    175179    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowASM))) {
    176         if (codegen::ASMOutputFilename.empty()) {
    177             ASMOutputStream.reset(new raw_fd_ostream(STDERR_FILENO, false, false));
    178         } else {
     180        if (codegen::ASMOutputFilename) {
    179181            std::error_code error;
    180182            ASMOutputStream.reset(new raw_fd_ostream(codegen::ASMOutputFilename, error, sys::fs::OpenFlags::F_None));
     183        } else {
     184            ASMOutputStream.reset(new raw_fd_ostream(STDERR_FILENO, false, false));
    181185        }
    182186        if (LLVM_UNLIKELY(mTarget->addPassesToEmitFile(PM, *ASMOutputStream, TargetMachine::CGFT_AssemblyFile))) {
  • icGREP/icgrep-devel/icgrep/toolchain/pipeline.cpp

    r5474 r5486  
    1111#include <boost/container/flat_set.hpp>
    1212#include <boost/container/flat_map.hpp>
     13#include <llvm/Support/CommandLine.h>
    1314#include <kernels/kernel_builder.h>
    1415
     
    1920using namespace llvm;
    2021
     22// static cl::opt<bool> UseYield("yield", cl::desc("yield after waiting"), cl::init(false));
    2123
    2224template <typename Value>
     
    105107
    106108        BasicBlock * const segmentWait = BasicBlock::Create(iBuilder->getContext(), kernel->getName() + "Wait", threadFunc);
     109
     110        BasicBlock * segmentYield = segmentWait;
     111//        if (UseYield) {
     112//            segmentYield = BasicBlock::Create(iBuilder->getContext(), kernel->getName() + "Yield", threadFunc);
     113//        }
     114
    107115        iBuilder->CreateBr(segmentWait);
    108116
     
    120128
    121129        if (kernel->hasNoTerminateAttribute()) {
    122             iBuilder->CreateCondBr(ready, segmentLoopBody, segmentWait);
     130            iBuilder->CreateCondBr(ready, segmentLoopBody, segmentYield);
    123131        } else { // If the kernel was terminated in a previous segment then the pipeline is done.
    124132            BasicBlock * completionTest = BasicBlock::Create(iBuilder->getContext(), kernel->getName() + "Completed", threadFunc, 0);
    125133            BasicBlock * exitBlock = BasicBlock::Create(iBuilder->getContext(), kernel->getName() + "Exit", threadFunc, 0);
    126             iBuilder->CreateCondBr(ready, completionTest, segmentWait);
     134            iBuilder->CreateCondBr(ready, completionTest, segmentYield);
    127135
    128136            iBuilder->SetInsertPoint(completionTest);
     
    134142            iBuilder->CreateBr(exitThreadBlock);
    135143        }
     144
     145//        if (UseYield) {
     146//            // Yield the thread after waiting
     147//            iBuilder->SetInsertPoint(segmentYield);
     148//            iBuilder->CreatePThreadYield();
     149//            iBuilder->CreateBr(segmentWait);
     150//        }
    136151
    137152        // Execute the kernel segment
     
    171186        if (codegen::EnableCycleCounter) {
    172187            cycleCountEnd = iBuilder->CreateReadCycleCounter();
    173             //Value * counterPtr = iBuilder->CreateGEP(mCycleCounts, {iBuilder->getInt32(0), iBuilder->getInt32(k)});
    174188            Value * counterPtr = iBuilder->getScalarFieldPtr(Kernel::CYCLECOUNT_SCALAR);
    175189            iBuilder->CreateStore(iBuilder->CreateAdd(iBuilder->CreateLoad(counterPtr), iBuilder->CreateSub(cycleCountEnd, cycleCountStart)), counterPtr);
  • icGREP/icgrep-devel/icgrep/toolchain/toolchain.cpp

    r5473 r5486  
    3131                        clEnumValEnd), cl::cat(CodeGenOptions));
    3232
    33 static cl::opt<std::string> IROutputFilenameOption("dump-generated-IR-output", cl::init(""),
     33static cl::opt<const char *> IROutputFilenameOption("dump-generated-IR-output", cl::init(nullptr),
    3434                                                       cl::desc("output IR filename"), cl::cat(CodeGenOptions));
    3535
    3636#ifndef USE_LLVM_3_6
    37 static cl::opt<std::string> ASMOutputFilenameOption("asm-output", cl::init(""),
     37static cl::opt<const char *> ASMOutputFilenameOption("asm-output", cl::init(nullptr),
    3838                                                    cl::desc("output ASM filename"), cl::cat(CodeGenOptions));
    3939
     
    4646
    4747
    48 static cl::opt<bool> EnableObjectCacheOption("enable-object-cache",
    49                                              cl::init(true), cl::desc("Enable object caching"), cl::cat(CodeGenOptions));
    50 
    51 static cl::opt<std::string> ObjectCacheDirOption("object-cache-dir",
    52                                                  cl::init(""), cl::desc("Path to the object cache diretory"), cl::cat(CodeGenOptions));
     48static cl::opt<bool, true> EnableObjectCacheOption("enable-object-cache", cl::location(EnableObjectCache), cl::init(true),
     49                                                   cl::desc("Enable object caching"), cl::cat(CodeGenOptions));
     50
     51static cl::opt<const char *> ObjectCacheDirOption("object-cache-dir", cl::init(nullptr),
     52                                                 cl::desc("Path to the object cache diretory"), cl::cat(CodeGenOptions));
    5353
    5454
     
    5757
    5858
    59 static cl::opt<int, true> SegmentSizeOption("segment-size", cl::location(SegmentSize),
    60                                             cl::desc("Segment Size"), cl::value_desc("positive integer"), cl::init(1));
     59static cl::opt<int, true> SegmentSizeOption("segment-size", cl::location(SegmentSize), cl::init(1),
     60                                            cl::desc("Segment Size"), cl::value_desc("positive integer"));
    6161
    6262static cl::opt<int, true> BufferSegmentsOption("buffer-segments", cl::location(BufferSegments), cl::init(1),
     
    6464
    6565
    66 static cl::opt<int, true> ThreadNumOption("thread-num", cl::location(ThreadNum), cl::init(2),
     66static cl::opt<int> ThreadNumOption("thread-num", cl::init(2),
    6767                                          cl::desc("Number of threads used for segment pipeline parallel"), cl::value_desc("positive integer"));
    6868
    6969
    7070static cl::opt<bool, true> EnableAssertsOption("ea", cl::location(EnableAsserts), cl::init(IN_DEBUG_MODE),
    71                                                cl::desc("Enable Asserts"));
     71                                               cl::desc("Enable Asserts"), cl::cat(CodeGenOptions));
    7272
    7373static cl::opt<bool, true> EnableCycleCountOption("ShowKernelCycles", cl::location(EnableCycleCounter), cl::init(false),
    74                                                   cl::desc("Count and report CPU cycles per kernel"), cl::cat(CodeGenOptions));
    75 
    76 static cl::opt<bool, true> pipelineParallelOption("enable-pipeline-parallel", cl::location(pipelineParallel),
     74                                             cl::desc("Count and report CPU cycles per kernel"), cl::cat(CodeGenOptions));
     75
     76static cl::opt<bool, true> pipelineParallelOption("enable-pipeline-parallel", cl::location(PipelineParallel), cl::init(false),
    7777                                                  cl::desc("Enable multithreading with pipeline parallelism."), cl::cat(CodeGenOptions));
    7878   
    79 static cl::opt<bool, true> segmentPipelineParallelOption("enable-segment-pipeline-parallel", cl::location(segmentPipelineParallel),
     79static cl::opt<bool, true> segmentPipelineParallelOption("enable-segment-pipeline-parallel", cl::location(SegmentPipelineParallel),
    8080                                                         cl::desc("Enable multithreading with segment pipeline parallelism."), cl::cat(CodeGenOptions));
    8181
    82 static cl::opt<bool> USENVPTX("NVPTX", cl::init(false),
    83                               cl::desc("Run on GPU only."));
     82static cl::opt<bool, true> NVPTXOption("NVPTX", cl::location(NVPTX), cl::init(false),
     83                                 cl::desc("Run on GPU only."), cl::cat(CodeGenOptions));
    8484
    8585static cl::opt<int, true> GroupNumOption("group-num", cl::location(GroupNum), cl::init(256),
    86                                          cl::desc("NUmber of groups declared on GPU"), cl::value_desc("positive integer"));
    87 
    88 
    89 const CodeGenOpt::Level OptLevel = [](const char optLevel) {
    90     switch (optLevel) {
    91         case '0': return CodeGenOpt::None;
    92         case '1': return CodeGenOpt::Less;
    93         case '2': return CodeGenOpt::Default;
    94         case '3': return CodeGenOpt::Aggressive;
    95         default: report_fatal_error(std::string(1,optLevel) + " is an invalid optimization level.");
    96     }
    97 }(OptLevelOption);
    98 
    99 bool pipelineParallel;
    100 bool segmentPipelineParallel;
    101 const std::string ASMOutputFilename = ASMOutputFilenameOption;
    102 const std::string IROutputFilename = IROutputFilenameOption;
    103 const std::string ObjectCacheDir = ObjectCacheDirOption;
     86                                         cl::desc("NUmber of groups declared on GPU"), cl::value_desc("positive integer"), cl::cat(CodeGenOptions));
     87
     88CodeGenOpt::Level OptLevel;
     89
     90bool PipelineParallel;
     91
     92bool SegmentPipelineParallel;
     93
     94const char * ASMOutputFilename;
     95
     96const char * IROutputFilename;
     97
     98const char * ObjectCacheDir;
     99
    104100int BlockSize;
     101
    105102int SegmentSize;
     103
    106104int BufferSegments;
     105
    107106int ThreadNum;
     107
    108108bool EnableAsserts;
     109
    109110bool EnableCycleCounter;
    110 const bool EnableObjectCache = EnableObjectCacheOption && (DebugOptions.getBits() == 0);
    111 bool NVPTX;
     111
     112bool EnableObjectCache;
     113
     114bool NVPTX = [](const bool nvptx) {
     115    #ifndef CUDA_ENABLED
     116    if (nvptx) {
     117        report_fatal_error("CUDA compiler is not supported.");
     118    }
     119    #endif
     120    return nvptx;
     121}(NVPTXOption);
     122
    112123int GroupNum;
    113124
    114125const llvm::Reloc::Model RelocModel = ::RelocModel;
     126
    115127const llvm::CodeModel::Model CMModel = ::CMModel;
     128
    116129const std::string MArch = ::MArch;
     130
    117131const std::string RunPass = ::RunPass;
     132
    118133const llvm::TargetMachine::CodeGenFileType FileType = ::FileType;
     134
    119135const std::string StopAfter = ::StopAfter;
     136
    120137const std::string StartAfter = ::StartAfter;
    121 #ifndef USE_LLVM_3_6
    122 const TargetOptions Options = [](const bool asmVerbose) {
    123     TargetOptions opt = InitTargetOptionsFromCodeGenFlags();
    124     opt.MCOptions.AsmVerbose = AsmVerbose;
    125     return opt;
    126 }(AsmVerbose);
    127 #else
    128 const TargetOptions Options = InitTargetOptionsFromCodeGenFlags();
    129 #endif
     138
     139TargetOptions Options;
    130140
    131141const cl::OptionCategory * codegen_flags() {
     
    149159}
    150160
    151 
    152 }
    153 
    154 void setNVPTXOption() {
    155     codegen::NVPTX = codegen::USENVPTX;
    156     if (codegen::NVPTX) {
    157         #ifndef CUDA_ENABLED
     161std::string ProgramName;
     162
     163void ParseCommandLineOptions(int argc, const char * const *argv, std::initializer_list<const cl::OptionCategory *> hiding) {
     164    AddParabixVersionPrinter();
     165    codegen::ProgramName = argv[0];
     166    #ifndef USE_LLVM_3_6
     167    if (hiding.size() != 0) {
     168        cl::HideUnrelatedOptions(ArrayRef<const cl::OptionCategory *>(hiding));
     169    }
     170    #endif
     171    cl::ParseCommandLineOptions(argc, argv);
     172    if (DebugOptions.getBits()) {
     173        EnableObjectCache = false;
     174    }
     175
     176    ThreadNum = (PipelineParallel || SegmentPipelineParallel) ? 2 : 1;
     177
     178    ObjectCacheDir = ObjectCacheDirOption;
     179    IROutputFilename = IROutputFilenameOption;
     180    ObjectCacheDir = ObjectCacheDirOption;
     181    Options = InitTargetOptionsFromCodeGenFlags();
     182    #ifndef USE_LLVM_3_6
     183    Options.MCOptions.AsmVerbose = AsmVerbose;
     184    #endif
     185    switch (OptLevelOption) {
     186        case '0': OptLevel = CodeGenOpt::None; break;
     187        case '1': OptLevel = CodeGenOpt::Less; break;
     188        case '2': OptLevel = CodeGenOpt::Default; break;
     189        case '3': OptLevel = CodeGenOpt::Aggressive; break;
     190        default: report_fatal_error(std::string(1, OptLevelOption) + " is an invalid optimization level.");
     191    }
     192    #ifndef CUDA_ENABLED
     193    if (NVPTX) {
    158194        report_fatal_error("CUDA compiler is not supported.");
    159         #endif
    160     }
     195    }
     196    #endif
     197}
     198
    161199}
    162200
    163201void printParabixVersion () {
    164     raw_ostream &OS = outs();
    165     OS << "Parabix (http://parabix.costar.sfu.ca/):\n  " << "Parabix revision " << PARABIX_VERSION << "\n";
     202    outs() << "Parabix (http://parabix.costar.sfu.ca/):\n  " << "Parabix revision " << PARABIX_VERSION << "\n";
    166203}
    167204
  • icGREP/icgrep-devel/icgrep/toolchain/toolchain.h

    r5464 r5486  
    3636bool DebugOptionIsSet(const DebugFlags flag);
    3737
    38 extern bool pipelineParallel;
    39 extern bool segmentPipelineParallel;
     38extern bool PipelineParallel;
     39extern bool SegmentPipelineParallel;
    4040#ifndef USE_LLVM_3_6
    41 extern const std::string ASMOutputFilename;
     41extern const char * ASMOutputFilename;
    4242#endif
    43 extern const std::string IROutputFilename;
    44 extern const std::string ObjectCacheDir;
    45 extern const llvm::CodeGenOpt::Level OptLevel;  // set from command line
     43extern const char * IROutputFilename;
     44extern const char * ObjectCacheDir;
     45extern llvm::CodeGenOpt::Level OptLevel;  // set from command line
    4646extern int BlockSize;  // set from command line
    4747extern int SegmentSize;  // set from command line
    4848extern int BufferSegments;
    4949extern int ThreadNum;
    50 extern const bool EnableObjectCache;
     50extern bool EnableObjectCache;
    5151extern bool EnableAsserts;
    5252extern bool EnableCycleCounter;
    5353extern bool NVPTX;
    5454extern int GroupNum;
    55 extern const llvm::TargetOptions Options;
     55extern std::string ProgramName;
     56extern llvm::TargetOptions Options;
    5657extern const llvm::Reloc::Model RelocModel;
    5758extern const llvm::CodeModel::Model CMModel;
     
    6667void setFunctionAttributes(llvm::StringRef CPU, llvm::StringRef Features, llvm::Module &M);
    6768
     69void ParseCommandLineOptions(int argc, const char *const *argv, std::initializer_list<const llvm::cl::OptionCategory *> hiding = {});
     70
    6871}
    69 
    70 
    71 void setNVPTXOption();
    7272
    7373void AddParabixVersionPrinter();
  • icGREP/icgrep-devel/icgrep/u8u16.cpp

    r5474 r5486  
    470470
    471471int main(int argc, char *argv[]) {
    472     AddParabixVersionPrinter();
    473     cl::HideUnrelatedOptions(ArrayRef<const cl::OptionCategory *>{&u8u16Options, pablo::pablo_toolchain_flags(), codegen::codegen_flags()});
    474     cl::ParseCommandLineOptions(argc, argv);
     472    codegen::ParseCommandLineOptions(argc, argv, {&u8u16Options, pablo::pablo_toolchain_flags(), codegen::codegen_flags()});
    475473    ParabixDriver pxDriver("u8u16");
    476474    if (enableAVXdel && AVX2_available() && codegen::BlockSize==256) {
  • icGREP/icgrep-devel/icgrep/wc.cpp

    r5474 r5486  
    207207
    208208int main(int argc, char *argv[]) {
    209     AddParabixVersionPrinter();
    210     cl::HideUnrelatedOptions(ArrayRef<const cl::OptionCategory *>{&wcFlags, pablo_toolchain_flags(), codegen::codegen_flags()});
    211     cl::ParseCommandLineOptions(argc, argv);
     209    codegen::ParseCommandLineOptions(argc, argv, {&wcFlags, pablo_toolchain_flags(), codegen::codegen_flags()});
    212210    if (wcOptions.size() == 0) {
    213211        CountLines = true;
Note: See TracChangeset for help on using the changeset viewer.