Changeset 6184


Ignore:
Timestamp:
Nov 2, 2018, 7:18:31 PM (6 months ago)
Author:
nmedfort
Message:

Initial version of PipelineKernel? + revised StreamSet? model.

Location:
icGREP/icgrep-devel/icgrep
Files:
26 added
5 deleted
156 edited
1 copied

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r6182 r6184  
    6262SET(GREP_CORE_SRC kernels/linebreak_kernel.cpp kernels/grep_kernel.cpp kernels/scanmatchgen.cpp )
    6363
    64 SET(TOOLCHAIN_SRC toolchain/toolchain.cpp toolchain/pipeline.cpp)
     64SET(TOOLCHAIN_SRC toolchain/toolchain.cpp toolchain/object_cache_manager.cpp)
    6565
    6666SET(DRIVER_SRC toolchain/driver.cpp toolchain/cpudriver.cpp)
     
    6969endif()
    7070
    71 SET(KERNEL_SRC kernels/attributes.cpp kernels/processing_rate.cpp kernels/interface.cpp kernels/kernel.cpp kernels/multiblock_kernel.cpp kernels/block_kernel.cpp kernels/streamset.cpp kernels/kernel_builder.cpp)
     71SET(KERNEL_SRC kernels/attributes.cpp kernels/binding.cpp kernels/relationship.cpp kernels/processing_rate.cpp)
     72SET(KERNEL_SRC ${KERNEL_SRC} kernels/kernel.cpp kernels/streamset.cpp kernels/multiblock_kernel.cpp kernels/block_kernel.cpp)
     73SET(KERNEL_SRC ${KERNEL_SRC} kernels/pipeline/pipeline_kernel.cpp kernels/pipeline/pipeline_builder.cpp)
    7274SET(KERNEL_SRC ${KERNEL_SRC} kernels/source_kernel.cpp kernels/s2p_kernel.cpp kernels/deletion.cpp kernels/swizzle.cpp kernels/p2s_kernel.cpp kernels/stdout_kernel.cpp kernels/hex_convert.cpp)
     75SET(KERNEL_SRC ${KERNEL_SRC} kernels/kernel_builder.cpp)
    7376
    7477SET(IDISA_SRC IR_Gen/CBuilder.cpp IR_Gen/idisa_builder.cpp IR_Gen/idisa_avx_builder.cpp IR_Gen/idisa_i64_builder.cpp IR_Gen/idisa_sse_builder.cpp)
     
    103106add_library(UCDlib UCD/Equivalence.cpp UCD/CaseFolding.cpp utf8_encoder.cpp utf16_encoder.cpp UCD/ucd_compiler.cpp UCD/PropertyObjects.cpp UCD/resolve_properties.cpp)
    104107add_library(GrepEngine  ${GREP_CORE_SRC} grep/grep_engine.cpp kernels/cc_kernel.cpp kernels/cc_scan_kernel.cpp kernels/charclasses.cpp kernels/streams_merge.cpp kernels/until_n.cpp kernels/UCD_property_kernel.cpp kernels/grapheme_kernel.cpp)
     108add_library(FileSelect util/file_select.cpp)
    105109add_library(LZ4_Lib lz4/lz4_frame_decoder.cpp kernels/cc_kernel.cpp kernels/pdep_kernel.cpp lz4/lz4_decompression_generator.cpp kernels/lz4/lz4_block_decoder.cpp kernels/bitstream_pdep_kernel.cpp kernels/bitstream_gather_pdep_kernel.cpp kernels/swizzled_multiple_pdep_kernel.cpp kernels/fake_stream_generating_kernel.cpp kernels/lz4/decompression/lz4_bytestream_decompression.cpp kernels/lz4/decompression/lz4_swizzled_decompression.cpp kernels/lz4/decompression/lz4_sequential_decompression_base.cpp kernels/lz4/decompression/lz4_bitstream_decompression.cpp kernels/lz4/decompression/lz4_twist_decompression.cpp kernels/lz4/twist_kernel.cpp kernels/lz4/untwist_kernel.cpp lz4/lz4_base_generator.cpp kernels/lz4/lz4_match_detector.cpp kernels/lz4/lz4_match_detector.h kernels/lz4/lz4_not_kernel.cpp kernels/lz4/lz4_not_kernel.h)
    106110
     
    114118target_link_libraries (CCADT PabloADT)
    115119target_link_libraries (RegExpADT PabloADT CCADT)
    116 target_link_libraries (UCDlib GrepEngine RegExpCompiler RegExpADT PabloADT CCADT)
     120target_link_libraries (UCDlib RegExpADT PabloADT CCADT) # GrepEngine RegExpCompiler
    117121target_link_libraries (RegExpCompiler UCDlib CCADT RegExpADT)
    118122target_link_libraries (GrepEngine RegExpCompiler CodeGen)
    119 
    120 add_executable(icgrep icgrep.cpp util/file_select.cpp grep_interface.cpp)
     123target_link_libraries (FileSelect GrepEngine)
     124
     125add_executable(icgrep icgrep.cpp grep_interface.cpp)
    121126add_executable(u8u16 u8u16.cpp)
    122127add_executable(u32u8 u32u8.cpp kernels/pdep_kernel.cpp)
    123128add_executable(base64 base64.cpp kernels/radix64.cpp)
    124 add_executable(wc wc.cpp util/file_select.cpp)
     129add_executable(wc wc.cpp)
    125130add_executable(editd editd/editd.cpp editd/pattern_compiler.cpp editd/editdscan_kernel.cpp editd/editd_gpu_kernel.cpp editd/editd_cpu_kernel.cpp kernels/streams_merge.cpp kernels/cc_kernel.cpp)
    126 add_executable(lz4d lz4d.cpp lz4/lz4_frame_decoder.cpp kernels/cc_kernel.cpp kernels/lz4/lz4_index_decoder.cpp kernels/lz4/lz4_bytestream_decoder.cpp)
     131# add_executable(lz4d lz4d.cpp lz4/lz4_frame_decoder.cpp kernels/cc_kernel.cpp kernels/lz4/lz4_index_decoder.cpp kernels/lz4/lz4_bytestream_decoder.cpp)
    127132add_executable(core combine/core.cpp combine/regexGen.cpp combine/stringGen.cpp combine/propGen.cpp combine/icgrep-test/icgrep-test.cpp grep_interface.cpp grep/grep_engine.cpp kernels/scanmatchgen.cpp kernels/u8u32_kernel.cpp kernels/delmask_kernel.cpp kernels/cc_kernel.cpp kernels/cc_scan_kernel.cpp kernels/charclasses.cpp kernels/linebreak_kernel.cpp kernels/streams_merge.cpp kernels/grep_kernel.cpp kernels/until_n.cpp)
    128 add_executable(character_deletion character_deletion.cpp kernels/cc_kernel.cpp)
    129 add_executable(character_deposit character_deposit.cpp kernels/cc_kernel.cpp kernels/pdep_kernel.cpp kernels/bitstream_pdep_kernel.cpp)
     133# add_executable(character_deletion character_deletion.cpp kernels/cc_kernel.cpp)
     134# add_executable(character_deposit character_deposit.cpp kernels/cc_kernel.cpp kernels/pdep_kernel.cpp kernels/bitstream_pdep_kernel.cpp)
    130135add_executable(lz4_decoder lz4_decoder.cpp)
    131 add_executable(lz4_grep grep_interface.cpp util/file_select.cpp lz4_grep.cpp lz4/grep/lz4_grep_base_generator.cpp lz4/grep/lz4_grep_bytestream_generator.cpp lz4/grep/lz4_grep_bytestream_generator.h lz4/grep/lz4_grep_bitstream_generator.cpp lz4/grep/lz4_grep_bitstream_generator.h lz4/grep/lz4_grep_swizzle_generator.cpp lz4/grep/lz4_grep_swizzle_generator.h)
     136add_executable(lz4_grep grep_interface.cpp lz4_grep.cpp lz4/grep/lz4_grep_base_generator.cpp lz4/grep/lz4_grep_bytestream_generator.cpp lz4/grep/lz4_grep_bytestream_generator.h lz4/grep/lz4_grep_bitstream_generator.cpp lz4/grep/lz4_grep_bitstream_generator.h lz4/grep/lz4_grep_swizzle_generator.cpp lz4/grep/lz4_grep_swizzle_generator.h)
    132137add_executable(idisa_test idisa_test.cpp)
    133138
     
    136141set_target_properties(core PROPERTIES EXCLUDE_FROM_ALL 1)
    137142
    138 target_link_libraries (icgrep GrepEngine UCDlib PabloADT RegExpCompiler CCADT CodeGen ${REQ_LLVM_LIBRARIES} ${Boost_LIBRARIES} ${CUDA_LIB})
     143target_link_libraries (icgrep GrepEngine UCDlib PabloADT RegExpCompiler CCADT CodeGen FileSelect ${REQ_LLVM_LIBRARIES} ${Boost_LIBRARIES} ${CUDA_LIB})
    139144target_link_libraries (u8u16 PabloADT CCADT CodeGen ${REQ_LLVM_LIBRARIES} ${Boost_LIBRARIES} ${CUDA_LIB})
    140145target_link_libraries (u32u8 PabloADT CCADT CodeGen ${REQ_LLVM_LIBRARIES} ${Boost_LIBRARIES} ${CUDA_LIB})
    141146target_link_libraries (base64 PabloADT CCADT CodeGen ${REQ_LLVM_LIBRARIES} ${Boost_LIBRARIES} ${CUDA_LIB})
    142 target_link_libraries (wc GrepEngine UCDlib PabloADT RegExpCompiler  CCADT CodeGen ${REQ_LLVM_LIBRARIES} ${Boost_LIBRARIES} ${CUDA_LIB})
     147target_link_libraries (wc UCDlib PabloADT RegExpCompiler CCADT CodeGen FileSelect ${REQ_LLVM_LIBRARIES} ${Boost_LIBRARIES} ${CUDA_LIB})
    143148target_link_libraries (editd PabloADT CCADT CodeGen ${REQ_LLVM_LIBRARIES} ${Boost_LIBRARIES} ${CUDA_LIB})
    144 target_link_libraries (lz4d PabloADT CCADT CodeGen ${REQ_LLVM_LIBRARIES} ${Boost_LIBRARIES} ${CUDA_LIB})
     149# target_link_libraries (lz4d PabloADT CCADT CodeGen ${REQ_LLVM_LIBRARIES} ${Boost_LIBRARIES} ${CUDA_LIB})
    145150target_link_libraries (core RegExpCompiler ${REQ_LLVM_LIBRARIES} ${Boost_LIBRARIES})
    146 target_link_libraries (character_deletion PabloADT RegExpCompiler CCADT CodeGen ${REQ_LLVM_LIBRARIES} ${Boost_LIBRARIES} ${CUDA_LIB})
    147 target_link_libraries (character_deposit PabloADT RegExpCompiler CCADT CodeGen ${REQ_LLVM_LIBRARIES} ${Boost_LIBRARIES} ${CUDA_LIB})
     151# target_link_libraries (character_deletion PabloADT RegExpCompiler CCADT CodeGen ${REQ_LLVM_LIBRARIES} ${Boost_LIBRARIES} ${CUDA_LIB})
     152# target_link_libraries (character_deposit PabloADT RegExpCompiler CCADT CodeGen ${REQ_LLVM_LIBRARIES} ${Boost_LIBRARIES} ${CUDA_LIB})
    148153target_link_libraries (lz4_decoder LZ4_Lib PabloADT CCADT CodeGen ${REQ_LLVM_LIBRARIES} ${Boost_LIBRARIES} ${CUDA_LIB})
    149 target_link_libraries (lz4_grep LZ4_Lib UCDlib PabloADT RegExpCompiler CodeGen CCADT CodeGen ${REQ_LLVM_LIBRARIES} ${Boost_LIBRARIES} ${CUDA_LIB})
     154target_link_libraries (lz4_grep LZ4_Lib UCDlib PabloADT RegExpCompiler CodeGen CCADT CodeGen FileSelect ${REQ_LLVM_LIBRARIES} ${Boost_LIBRARIES} ${CUDA_LIB})
    150155target_link_libraries (idisa_test CodeGen PabloADT ${REQ_LLVM_LIBRARIES} ${Boost_LIBRARIES} ${CUDA_LIB})
    151156
     
    238243SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE} -O3 -DNDEBUG")
    239244
    240 SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_DEBUG} -O1 -g -fno-omit-frame-pointer -fno-optimize-sibling-calls")
     245# no pie disables the generation of position-independent executables, which is a default security feature of newer compiles
     246# that prevents addr2line from being able to resolve which line corresponds to the code at run time. This in turn prevents
     247# CreateAssert from being able to provide the compilation call stack for each JIT'ed assertion error.
     248
     249SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_DEBUG} -O1 -g -fno-omit-frame-pointer -fno-optimize-sibling-calls -no-pie")
    241250
    242251UNSET(HAS_ADDRESS_SANITIZER)
     
    266275  NAME u8u16_test
    267276  WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/../QA/u8u16
    268   COMMAND ./run_all "${CMAKE_BINARY_DIR}/u8u16 -segment-size=16 -enable-segment-pipeline-parallel")
     277  COMMAND ./run_all "${CMAKE_BINARY_DIR}/u8u16 -enable-segment-pipeline-parallel")
    269278
    270279add_test(
     
    327336add_custom_target (u8u16_test
    328337  WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/../QA/u8u16
    329   COMMAND ./run_all "${CMAKE_BINARY_DIR}/u8u16 -segment-size=16 -enable-segment-pipeline-parallel")
     338  COMMAND ./run_all "${CMAKE_BINARY_DIR}/u8u16 -enable-segment-pipeline-parallel")
    330339
    331340add_custom_target (u32u8_test
  • icGREP/icgrep-devel/icgrep/IR_Gen/CBuilder.cpp

    r6121 r6184  
    1212#include <llvm/IR/TypeBuilder.h>
    1313#include <llvm/IR/MDBuilder.h>
     14#include <llvm/ADT/DenseSet.h>
    1415#include <llvm/Support/raw_ostream.h>
    1516#include <llvm/Support/Format.h>
    1617#include <toolchain/toolchain.h>
    1718#include <toolchain/driver.h>
    18 #include <set>
    19 #include <thread>
     19//#include <thread>
    2020#include <stdlib.h>
    2121#include <sys/mman.h>
     
    2323#include <stdio.h>
    2424#include <boost/format.hpp>
     25#include <boost/interprocess/mapped_region.hpp>
    2526
    2627#if defined(__i386__)
     
    4748#endif
    4849
    49 
    5050using namespace llvm;
    5151
     
    122122
    123123Value * CBuilder::CreateURem(Value * const number, Value * const divisor, const Twine & Name) {
    124     if (ConstantInt * c = dyn_cast<ConstantInt>(divisor)) {
     124    if (ConstantInt * const c = dyn_cast<ConstantInt>(divisor)) {
    125125        const auto d = c->getZExtValue();
    126126        assert ("CreateURem divisor cannot be 0!" && d);
    127127        if (is_power_2(d)) {
    128             return CreateAnd(number, ConstantInt::get(divisor->getType(), d - 1), Name);
     128            if (LLVM_UNLIKELY(d == 1)) {
     129                return ConstantInt::getNullValue(number->getType());
     130            } else {
     131                return CreateAnd(number, ConstantInt::get(number->getType(), d - 1), Name);
     132            }
    129133        }
    130134    }
     
    152156    assert (number->getType() == divisor->getType());
    153157    Type * const t = number->getType();
    154     Value * const n = CreateAdd(number, CreateSub(divisor, ConstantInt::get(t, 1)));
    155     if (isa<ConstantInt>(divisor)) {
    156         const auto d = cast<ConstantInt>(divisor)->getZExtValue();
    157         if (is_power_2(d)) {
    158             if (d > 1) {
    159                 return CreateLShr(n, ConstantInt::get(t, std::log2(d)), Name);
    160             } else {
    161                 return number;
    162             }
    163         }
    164     }
    165     CreateAssert(divisor, "CreateCeilUDiv divisor cannot be 0!");
    166     return CreateUDiv(n, divisor, Name);
     158    Constant * const ONE = ConstantInt::get(t, 1);
     159    // avoid overflow with x+y-1
     160    return CreateAdd(CreateUDiv(CreateSub(number, ONE), divisor), ONE, Name);
    167161}
    168162
     
    297291}
    298292
    299 void CBuilder::CallPrintIntCond(const std::string & name, llvm::Value * const value, llvm::Value * const cond) {
    300     BasicBlock* callBlock = this->CreateBasicBlock("callBlock");
    301     BasicBlock* exitBlock = this->CreateBasicBlock("exitBlock");
    302     this->CreateCondBr(cond, callBlock, exitBlock);
    303 
    304     this->SetInsertPoint(callBlock);
    305     this->CallPrintInt(name, value);
    306 
    307     this->CreateBr(exitBlock);
    308     this->SetInsertPoint(exitBlock);
    309 }
    310 
    311 void CBuilder::CallPrintInt(const std::string & name, Value * const value) {
    312     Module * const m = getModule();
    313     Constant * printRegister = m->getFunction("PrintInt");
    314     IntegerType * int64Ty = getInt64Ty();
     293void CBuilder::CallPrintIntCond(const std::string & name, Value * const value, Value * const cond, const STD_FD fd) {
     294    BasicBlock * const insertBefore = GetInsertBlock()->getNextNode();
     295    BasicBlock* const callBlock = CreateBasicBlock("callBlock", insertBefore);
     296    BasicBlock* const exitBlock = CreateBasicBlock("exitBlock", insertBefore);
     297    CreateCondBr(cond, callBlock, exitBlock);
     298    SetInsertPoint(callBlock);
     299    CallPrintInt(name, value, fd);
     300    CreateBr(exitBlock);
     301    SetInsertPoint(exitBlock);
     302}
     303
     304void CBuilder::CallPrintInt(const std::string & name, Value * const value, const STD_FD fd) {
     305    Module * const m = getModule();
     306    Constant * printRegister = m->getFunction("print_int");
    315307    if (LLVM_UNLIKELY(printRegister == nullptr)) {
    316         FunctionType *FT = FunctionType::get(getVoidTy(), { getInt8PtrTy(), int64Ty }, false);
    317         Function * function = Function::Create(FT, Function::InternalLinkage, "PrintInt", m);
     308        FunctionType *FT = FunctionType::get(getVoidTy(), { getInt32Ty(), PointerType::get(getInt8Ty(), 0), getSizeTy() }, false);
     309        Function * function = Function::Create(FT, Function::InternalLinkage, "print_int", m);
    318310        auto arg = function->arg_begin();
    319         std::string out = "%-40s = %" PRIx64 "\n";
     311        const char * out = "%-40s = %" PRIx64 "\n";
    320312        BasicBlock * entry = BasicBlock::Create(getContext(), "entry", function);
    321313        IRBuilder<> builder(entry);
    322314        std::vector<Value *> args;
    323         args.push_back(GetString(out.c_str()));
    324         Value * const name = &*(arg++);
    325         name->setName("name");
    326         args.push_back(name);
    327         Value * value = &*arg;
    328         value->setName("value");
    329         args.push_back(value);
    330         builder.CreateCall(GetPrintf(), args);
    331         builder.CreateRetVoid();
    332 
    333         printRegister = function;
    334     }
    335     Value * num = nullptr;
    336     if (value->getType()->isPointerTy()) {
    337         num = CreatePtrToInt(value, int64Ty);
    338     } else {
    339         num = CreateZExtOrBitCast(value, int64Ty);
    340     }
    341     assert (num->getType()->isIntegerTy());
    342     CreateCall(printRegister, {GetString(name.c_str()), num});
    343 }
    344 
    345 void CBuilder::CallPrintIntToStderr(const std::string & name, Value * const value) {
    346     Module * const m = getModule();
    347     Constant * printRegister = m->getFunction("PrintIntToStderr");
    348     if (LLVM_UNLIKELY(printRegister == nullptr)) {
    349         FunctionType *FT = FunctionType::get(getVoidTy(), { PointerType::get(getInt8Ty(), 0), getSizeTy() }, false);
    350         Function * function = Function::Create(FT, Function::InternalLinkage, "PrintIntToStderr", m);
    351         auto arg = function->arg_begin();
    352         std::string out = "%-40s = %" PRIx64 "\n";
    353         BasicBlock * entry = BasicBlock::Create(getContext(), "entry", function);
    354         IRBuilder<> builder(entry);
    355         std::vector<Value *> args;
    356         args.push_back(getInt32(STDERR_FILENO));
    357         args.push_back(GetString(out.c_str()));
     315        Value * const fdInt = &*(arg++);
     316        args.push_back(fdInt);
     317        args.push_back(GetString(out));
    358318        Value * const name = &*(arg++);
    359319        name->setName("name");
     
    364324        builder.CreateCall(GetDprintf(), args);
    365325        builder.CreateRetVoid();
    366 
    367326        printRegister = function;
    368327    }
     
    374333    }
    375334    assert (num->getType()->isIntegerTy());
    376     CreateCall(printRegister, {GetString(name.c_str()), num});
    377 }
    378 
    379 void CBuilder::CallPrintMsgToStderr(const std::string & message) {
    380     Module * const m = getModule();
    381     Constant * printMsg = m->getFunction("PrintMsgToStderr");
    382     if (LLVM_UNLIKELY(printMsg == nullptr)) {
    383         FunctionType *FT = FunctionType::get(getVoidTy(), { PointerType::get(getInt8Ty(), 0) }, false);
    384         Function * function = Function::Create(FT, Function::InternalLinkage, "PrintMsgToStderr", m);
    385         auto arg = function->arg_begin();
    386         std::string out = "%s\n";
    387         BasicBlock * entry = BasicBlock::Create(getContext(), "entry", function);
    388         IRBuilder<> builder(entry);
    389         std::vector<Value *> args;
    390         args.push_back(getInt32(STDERR_FILENO));
    391         args.push_back(GetString(out));
    392         Value * const msg = &*(arg++);
    393         msg->setName("msg");
    394         args.push_back(msg);
    395         builder.CreateCall(GetDprintf(), args);
    396         builder.CreateRetVoid();
    397 
    398         printMsg = function;
    399     }
    400     CreateCall(printMsg, {GetString(message.c_str())});
     335    CreateCall(printRegister, {getInt32(static_cast<uint32_t>(fd)), GetString(name.c_str()), num});
    401336}
    402337
     
    415350    CallInst * const ptr = CreateCall(f, size);
    416351    CreateAssert(ptr, "CreateMalloc: returned null pointer");
     352    CreateMemZero(ptr, size, 1);
    417353    return ptr;
     354}
     355
     356Value * CBuilder::CreateCacheAlignedMalloc(Type * const type, Value * const ArraySize, const unsigned addressSpace) {
     357    Value * size = ConstantExpr::getSizeOf(type);
     358    if (ArraySize) {
     359        size = CreateMul(size, CreateZExtOrTrunc(ArraySize, size->getType()));
     360    }
     361    return CreatePointerCast(CreateCacheAlignedMalloc(size), type->getPointerTo(addressSpace));
    418362}
    419363
     
    426370    PointerType * const voidPtrTy = getVoidPtrTy();
    427371    ConstantInt * const align = ConstantInt::get(sizeTy, alignment);
    428     ConstantInt * const alignMask = ConstantInt::get(sizeTy, alignment - 1);
    429372    size = CreateZExtOrTrunc(size, sizeTy);
    430     Value * const offset = CreateAnd(size, alignMask);
    431     size = CreateSelect(CreateIsNull(offset), size, CreateAdd(size, CreateXor(offset, alignMask)));
    432     CreateAssertZero(CreateURem(size, align), "CreateAlignedMalloc: size must be an integral multiple of alignment.");
     373    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
     374        CreateAssertZero(CreateURem(size, align), "CreateAlignedMalloc: size must be an integral multiple of alignment.");
     375    }
    433376    Value * ptr = nullptr;
    434     if (hasPosixMemalign()) {
    435         Function * f = m->getFunction("posix_memalign");
    436         if (LLVM_UNLIKELY(f == nullptr)) {
    437             FunctionType * const fty = FunctionType::get(getInt32Ty(), {voidPtrTy->getPointerTo(), sizeTy, sizeTy}, false);
    438             f = Function::Create(fty, Function::ExternalLinkage, "posix_memalign", m);
    439             f->setCallingConv(CallingConv::C);
    440         }
    441         Value * handle = CreateAlloca(voidPtrTy);
    442         CallInst * success = CreateCall(f, {handle, align, size});
    443         if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
    444             CreateAssertZero(success, "CreateAlignedMalloc: posix_memalign reported bad allocation");
    445         }
    446         ptr = CreateLoad(handle);
    447     } else if (hasAlignedAlloc()) {
     377    if (hasAlignedAlloc()) {
    448378        Function * f = m->getFunction("aligned_alloc");
    449379        if (LLVM_UNLIKELY(f == nullptr)) {
     
    454384        }
    455385        ptr = CreateCall(f, {align, size});
     386    } else if (hasPosixMemalign()) {
     387        Function * f = m->getFunction("posix_memalign");
     388        if (LLVM_UNLIKELY(f == nullptr)) {
     389            FunctionType * const fty = FunctionType::get(getInt32Ty(), {voidPtrTy->getPointerTo(), sizeTy, sizeTy}, false);
     390            f = Function::Create(fty, Function::ExternalLinkage, "posix_memalign", m);
     391            f->setCallingConv(CallingConv::C);
     392        }
     393        Value * handle = CreateAlloca(voidPtrTy);
     394        CallInst * success = CreateCall(f, {handle, align, size});
     395        if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
     396            CreateAssertZero(success, "CreateAlignedMalloc: posix_memalign reported bad allocation");
     397        }
     398        ptr = CreateLoad(handle);
    456399    } else {
    457400        report_fatal_error("stdlib.h does not contain either aligned_alloc or posix_memalign");
    458401    }
    459     CreateAssert(ptr, "CreateAlignedMalloc: returned null pointer.");
     402    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
     403        CreateAssert(ptr, "CreateAlignedMalloc: returned null (out of memory?)");
     404    }
     405    CreateMemZero(ptr, size, alignment);
    460406    return ptr;
    461407}
     
    632578    } else { // no OS mremap support
    633579        ptr = CreateAnonymousMMap(newSize);
    634         CreateMemCpy(ptr, addr, oldSize, getpagesize());
     580        CreateMemCpy(ptr, addr, oldSize, getPageSize());
    635581        CreateMUnmap(addr, oldSize);
    636582    }
     
    653599        CreateAssert(len, "CreateMUnmap: length cannot be 0");
    654600        Value * const addrValue = CreatePtrToInt(addr, intPtrTy);
    655         Value * const pageOffset = CreateURem(addrValue, ConstantInt::get(intPtrTy, getpagesize()));
     601        Value * const pageOffset = CreateURem(addrValue, ConstantInt::get(intPtrTy, getPageSize()));
    656602        CreateAssertZero(pageOffset, "CreateMUnmap: addr must be a multiple of the page size");
    657603        Value * const boundCheck = CreateICmpULT(addrValue, CreateSub(ConstantInt::getAllOnesValue(intPtrTy), CreateZExtOrTrunc(len, intPtrTy)));
     
    662608}
    663609
    664 Value * CBuilder::CreateMProtect(Value * addr, Value * size, const int protect) {
     610Value * CBuilder::CreateMProtect(Value * addr, Value * size, const Protect protect) {
    665611    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    666612        // mprotect() changes the access protections for the calling process's
     
    675621        // in a process's address space (except for the kernel vsyscall area).
    676622        // In particular, it can be used to change existing code mappings to be
    677         // writable.
    678 
    679 //        Triple T(mTriple);
    680 //        if (!T.isOSLinux()) {
    681 //            DataLayout DL(getModule());
    682 //            IntegerType * const intPtrTy = getIntPtrTy(DL);
    683 //            Value * a = CreatePtrToInt(addr, intPtrTy);
    684 //            Constant * const pageSize = ConstantInt::get(intPtrTy, getpagesize());
    685 //            CreateAssertZero(CreateURem(a, pageSize), "CreateMProtect: addr must be aligned to page boundary on non-Linux architectures");
    686 //        }
     623        // writable. (NOTE: does not appear to be true on UBUNTU 16.04, 16.10 or 18.04)
     624
     625        DataLayout DL(getModule());
     626        IntegerType * const intPtrTy = getIntPtrTy(DL);
     627        Constant * const pageSize = ConstantInt::get(intPtrTy, getPageSize());
     628        CreateAssertZero(CreateURem(CreatePtrToInt(addr, intPtrTy), pageSize), "CreateMProtect: addr must be aligned to page boundary");
    687629    }
    688630
     
    699641    addr = CreatePointerCast(addr, voidPtrTy);
    700642    size = CreateZExtOrTrunc(size, sizeTy);
    701     return CreateCall(mprotectFunc, {addr, size, ConstantInt::get(int32Ty, (int)protect)});
    702 
     643    Value * const result = CreateCall(mprotectFunc, {addr, size, ConstantInt::get(int32Ty, (int)protect)});
     644    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
     645        CreateAssertZero(result, "CreateMProtect: could not change the permission of the given address range");
     646    }
     647    return result;
    703648}
    704649
     
    823768        pthreadCreateFunc->setCallingConv(CallingConv::C);
    824769    }
     770    assert (thread->getType()->isPointerTy());
    825771    return CreateCall(pthreadCreateFunc, {thread, attr, start_routine, CreatePointerCast(arg, voidPtrTy)});
    826772}
     
    895841    }
    896842    out.changeColor(raw_fd_ostream::WHITE, true);
    897     out << msg << "\n";
     843    out << msg << "\n";   
     844    if (trace == nullptr) {
     845        out.changeColor(raw_fd_ostream::WHITE, true);
     846        out << "No debug symbols loaded.\n";
     847    }
    898848    out.resetColor();
    899849    out.flush();
     
    1017967            function = Function::Create(fty, Function::PrivateLinkage, "assert", m);
    1018968            function->setDoesNotThrow();
    1019 #if LLVM_VERSION_INTEGER < LLVM_VERSION_CODE(5, 0, 0)
     969            #if LLVM_VERSION_INTEGER < LLVM_VERSION_CODE(5, 0, 0)
    1020970            function->setDoesNotAlias(2);
    1021 #endif
     971            #endif
    1022972            BasicBlock * const entry = BasicBlock::Create(getContext(), "", function);
    1023973            BasicBlock * const failure = BasicBlock::Create(getContext(), "", function);
     
    1042992            restoreIP(ip);
    1043993        }
    1044 
     994        #ifndef NDEBUG
    1045995        SmallVector<unw_word_t, 64> stack;
    1046996        #if defined(HAS_MACH_VM_TYPES)
     
    10821032        }
    10831033        #endif
     1034        // TODO: look into how to safely use __builtin_return_address(0)?
     1035
     1036
     1037        const unsigned FIRST_NON_ASSERT = 2;
    10841038        Value * trace = nullptr;
    10851039        ConstantInt * depth = nullptr;
    1086         if (stack.empty()) {
     1040        if (LLVM_UNLIKELY(stack.size() < FIRST_NON_ASSERT)) {
    10871041            trace = ConstantPointerNull::get(stackPtrTy);
    10881042            depth = getInt32(0);
    10891043        } else {
    1090             const auto n = stack.size() - 1;
     1044            const auto n = stack.size() - FIRST_NON_ASSERT;
    10911045            for (GlobalVariable & gv : m->getGlobalList()) {
    10921046                Type * const ty = gv.getValueType();
     
    10951049                    bool found = true;
    10961050                    for (size_t i = 0; i < n; ++i) {
    1097                         if (LLVM_LIKELY(array->getElementAsInteger(i) != stack[i + 1])) {
     1051                        if (LLVM_LIKELY(array->getElementAsInteger(i) != stack[i + FIRST_NON_ASSERT])) {
    10981052                            found = false;
    10991053                            break;
     
    11071061            }
    11081062            if (LLVM_LIKELY(trace == nullptr)) {
    1109                 Constant * const initializer = ConstantDataArray::get(getContext(), ArrayRef<unw_word_t>(stack.data() + 1, n));
     1063                Constant * const initializer = ConstantDataArray::get(getContext(), ArrayRef<unw_word_t>(stack.data() + FIRST_NON_ASSERT, n));
    11101064                trace = new GlobalVariable(*m, initializer->getType(), true, GlobalVariable::InternalLinkage, initializer);
    11111065            }
    11121066            trace = CreatePointerCast(trace, stackPtrTy);
    11131067            depth = getInt32(n);
    1114         }       
     1068        }
     1069        #else
     1070        Value * trace = ConstantPointerNull::get(stackPtrTy);
     1071        Value * depth = getInt32(0);
     1072        #endif
    11151073        SmallVector<char, 1024> tmp;
    11161074        IRBuilder<>::CreateCall(function, {assertion, GetString(failureMessage.toStringRef(tmp)), trace, depth});
     
    11181076
    11191077        // INVESTIGATE: while interesting, this does not seem to produce faster code and only provides a trivial
    1120         // reduction of compiled code size in LLVM 3.8 but nearly doubles compilation time. This may have been
     1078        // reduction of compiled code size in LLVM 3.8 but nearly doubles JIT compilation time. This may have been
    11211079        // improved with later versions of LLVM but it's likely that assumptions ought to be hand placed once
    11221080        // they're proven to improve performance.
     
    12471205    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    12481206        CHECK_ADDRESS(Ptr, ConstantExpr::getSizeOf(Ptr->getType()->getPointerElementType()), "CreateLoad");
    1249     }
     1207    }   
    12501208    return IRBuilder<>::CreateLoad(Ptr, isVolatile, Name);
    12511209}
     
    13111269}
    13121270
    1313 Value * CBuilder::CreateMemChr(llvm::Value * ptr, llvm::Value * byteVal, llvm::Value * num) {
     1271Value * CBuilder::CreateMemChr(Value * ptr, Value * byteVal, Value * num) {
    13141272    Module * const m = getModule();
    13151273    Function * memchrFn = m->getFunction("memchr");
     
    13841342}
    13851343
     1344CallInst * CBuilder::CreateMemCmp(Value * Ptr1, Value * Ptr2, Value * Num) {
     1345    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
     1346        CHECK_ADDRESS(Ptr1, Num, "CreateMemCmp: Ptr1");
     1347        CHECK_ADDRESS(Ptr2, Num, "CreateMemCmp: Ptr2");
     1348    }
     1349    Module * const m = getModule();
     1350    Function * f = m->getFunction("memcmp");
     1351    PointerType * const voidPtrTy = getVoidPtrTy();
     1352    IntegerType * const sizeTy = getSizeTy();
     1353    if (f == nullptr) {
     1354        FunctionType * const fty = FunctionType::get(getInt32Ty(), {voidPtrTy, voidPtrTy, sizeTy}, false);
     1355        f = Function::Create(fty, Function::ExternalLinkage, "memcmp", m);
     1356        f->setCallingConv(CallingConv::C);
     1357        #if LLVM_VERSION_INTEGER < LLVM_VERSION_CODE(5, 0, 0)
     1358        f->setDoesNotAlias(1);
     1359        f->setDoesNotAlias(2);
     1360        #endif
     1361    }
     1362    Ptr1 = CreatePointerCast(Ptr1, voidPtrTy);
     1363    Ptr2 = CreatePointerCast(Ptr2, voidPtrTy);
     1364    Num = CreateZExtOrTrunc(Num, sizeTy);
     1365    return CreateCall(f, {Ptr1, Ptr2, Num});
     1366}
     1367
     1368Value * CBuilder::CreateExtractElement(Value * Vec, Value *Idx, const Twine & Name) {
     1369    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
     1370        if (LLVM_UNLIKELY(!Vec->getType()->isVectorTy())) {
     1371            report_fatal_error("CreateExtractElement: Vec argument is not a vector type");
     1372        }
     1373        Constant * const Size = ConstantInt::get(Idx->getType(), Vec->getType()->getVectorNumElements());
     1374        // exctracting an element from a position that exceeds the length of the vector is undefined
     1375        CreateAssert(CreateICmpULT(Idx, Size), "CreateExtractElement: Idx is greater than Vec size");
     1376    }
     1377    return IRBuilder<>::CreateExtractElement(Vec, Idx, Name);
     1378}
     1379
     1380Value * CBuilder::CreateInsertElement(Value * Vec, Value * NewElt, Value * Idx, const Twine & Name) {
     1381    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
     1382        if (LLVM_UNLIKELY(!Vec->getType()->isVectorTy())) {
     1383            report_fatal_error("CreateExtractElement: Vec argument is not a vector type");
     1384        }
     1385        Constant * const Size = ConstantInt::get(Idx->getType(), Vec->getType()->getVectorNumElements());
     1386        // inserting an element into a position that exceeds the length of the vector is undefined
     1387        CreateAssert(CreateICmpULT(Idx, Size), "CreateInsertElement: Idx is greater than Vec size");
     1388    }
     1389    return IRBuilder<>::CreateInsertElement(Vec, NewElt, Idx, Name);
     1390}
     1391
    13861392CallInst * CBuilder::CreateSRandCall(Value * randomSeed) {
    13871393    Module * const m = getModule();
     
    14061412}
    14071413
     1414unsigned CBuilder::getPageSize() {
     1415    return boost::interprocess::mapped_region::get_page_size();
     1416}
    14081417
    14091418
     
    14161425
    14171426}
     1427
     1428struct RemoveRedundantAssertionsPass : public llvm::ModulePass {
     1429    static char ID;
     1430    RemoveRedundantAssertionsPass() : ModulePass(ID) { }
     1431
     1432    virtual bool runOnModule(llvm::Module &M) override;
     1433};
     1434
     1435llvm::ModulePass * createRemoveRedundantAssertionsPass() {
     1436    return new RemoveRedundantAssertionsPass();
     1437}
     1438
     1439char RemoveRedundantAssertionsPass::ID = 0;
     1440
     1441bool RemoveRedundantAssertionsPass::runOnModule(Module & M) {
     1442    Function * const assertFunc = M.getFunction("assert");
     1443    if (LLVM_UNLIKELY(assertFunc == nullptr)) {
     1444        return false;
     1445    }
     1446    bool modified = false;
     1447    DenseSet<Value *> S;
     1448    for (auto & F : M) {
     1449        for (auto & B : F) {
     1450            S.clear();
     1451            for (BasicBlock::iterator i = B.begin(); i != B.end(); ) {
     1452                Instruction & inst = *i;
     1453                if (LLVM_UNLIKELY(isa<CallInst>(inst))) {
     1454                    CallInst & ci = cast<CallInst>(inst);
     1455                    if (ci.getCalledFunction() == assertFunc) {
     1456                        bool remove = false;
     1457                        Value * const check = ci.getOperand(0);
     1458                        if (LLVM_UNLIKELY(isa<Constant>(check))) {
     1459                            if (LLVM_LIKELY(cast<Constant>(check)->isOneValue())) {
     1460                                remove = true;
     1461                            } else {
     1462                                // TODO: show all static failures with their compilation context
     1463                            }
     1464                        } else if (LLVM_UNLIKELY(S.count(check))) { // will never be executed
     1465                            remove = true;
     1466                        } else {
     1467                            S.insert(check);
     1468                        }
     1469                        if (LLVM_UNLIKELY(remove)) {
     1470                            i = ci.eraseFromParent();
     1471                            modified = true;
     1472                            continue;
     1473                        }
     1474                    }
     1475                }
     1476                ++i;
     1477            }
     1478        }
     1479    }
     1480    return modified;
     1481}
  • icGREP/icgrep-devel/icgrep/IR_Gen/CBuilder.h

    r6120 r6184  
    1313#include <llvm/IR/Function.h>
    1414#endif
     15#include <unistd.h>
    1516
    1617namespace kernels { class KernelBuilder; }
     
    2223namespace llvm { class Value; }
    2324
    24 class Driver;
     25class BaseDriver;
    2526
    2627class CBuilder : public llvm::IRBuilder<> {
    27     using Predicate = llvm::CmpInst::Predicate;
    2828public:
    2929
     
    6565           
    6666    // Get minimum of two unsigned numbers
    67     llvm::Value * CreateUMin(llvm::Value * const a, llvm::Value * const b) {
    68         if (a == nullptr) return b;
    69         if (b == nullptr) return a;
     67    llvm::Value * CreateUMin(llvm::Value * const a, llvm::Value * const b, const llvm::Twine &Name = "") {
     68        if (LLVM_UNLIKELY(a == nullptr || a == b)) return b;
     69        if (LLVM_UNLIKELY(b == nullptr)) return a;
    7070        assert (a->getType() == b->getType());
    71         return CreateSelect(CreateICmpULT(a, b), a, b);
     71        return CreateSelect(CreateICmpULT(a, b), a, b, Name);
    7272    }
    7373
    7474    // Get minimum of two signed numbers
    75     llvm::Value * CreateSMin(llvm::Value * const a, llvm::Value * const b) {
    76         if (a == nullptr) return b;
    77         if (b == nullptr) return a;
     75    llvm::Value * CreateSMin(llvm::Value * const a, llvm::Value * const b, const llvm::Twine &Name = "") {
     76        if (LLVM_UNLIKELY(a == nullptr || a == b)) return b;
     77        if (LLVM_UNLIKELY(b == nullptr)) return a;
    7878        assert (a->getType() == b->getType());
    79         return CreateSelect(CreateICmpSLT(a, b), a, b);
     79        return CreateSelect(CreateICmpSLT(a, b), a, b, Name);
    8080    }
    8181
    8282    // Get maximum of two unsigned numbers
    83     llvm::Value * CreateUMax(llvm::Value * const a, llvm::Value * const b) {
    84         if (a == nullptr) return b;
    85         if (b == nullptr) return a;
     83    llvm::Value * CreateUMax(llvm::Value * const a, llvm::Value * const b, const llvm::Twine &Name = "") {
     84        if (LLVM_UNLIKELY(a == nullptr || a == b)) return b;
     85        if (LLVM_UNLIKELY(b == nullptr)) return a;
    8686        assert (a->getType() == b->getType());
    87         return CreateSelect(CreateICmpUGT(a, b), a, b);
     87        return CreateSelect(CreateICmpUGT(a, b), a, b, Name);
    8888    }
    8989
    9090    // Get maximum of two signed numbers
    91     llvm::Value * CreateSMax(llvm::Value * const a, llvm::Value * const b) {
    92         if (a == nullptr) return b;
    93         if (b == nullptr) return a;
     91    llvm::Value * CreateSMax(llvm::Value * const a, llvm::Value * const b, const llvm::Twine &Name = "") {
     92        if (LLVM_UNLIKELY(a == nullptr || a == b)) return b;
     93        if (LLVM_UNLIKELY(b == nullptr)) return a;
    9494        assert (a->getType() == b->getType());
    95         return CreateSelect(CreateICmpSGT(a, b), a, b);
     95        return CreateSelect(CreateICmpSGT(a, b), a, b, Name);
    9696    }
    9797
     
    103103        return CreateAlignedMalloc(size, getCacheAlignment());
    104104    }
    105    
     105
     106    llvm::Value * CreateCacheAlignedMalloc(llvm::Type * const type, llvm::Value * const ArraySize = nullptr, const unsigned addressSpace = 0);
     107
    106108    void CreateFree(llvm::Value * const ptr);
    107109
     
    113115
    114116    llvm::Value * CreateMemChr(llvm::Value * ptr, llvm::Value * byteVal, llvm::Value * num);
     117
     118    llvm::CallInst * CreateMemCmp(llvm::Value * ptr1, llvm::Value * ptr2, llvm::Value * num);
    115119   
    116120    llvm::AllocaInst * CreateAlignedAlloca(llvm::Type * const Ty, const unsigned alignment, llvm::Value * const ArraySize = nullptr) {
    117         llvm::AllocaInst * instr = CreateAlloca(Ty, ArraySize);
    118         instr->setAlignment(alignment);
    119         return instr;
     121        llvm::AllocaInst * const alloc = CreateAlloca(Ty, ArraySize);
     122        alloc->setAlignment(alignment);
     123        return alloc;
    120124    }
    121125
     
    194198    };
    195199
    196     llvm::Value * CreateMProtect(llvm::Value * addr, llvm::Value * size, int protect);
     200    llvm::Value * CreateMProtect(llvm::Value * addr, const Protect protect) {
     201        return CreateMProtect(addr, llvm::ConstantExpr::getSizeOf(addr->getType()->getPointerElementType()), protect);
     202    }
     203
     204    llvm::Value * CreateMProtect(llvm::Value * addr, llvm::Value * size, const Protect protect);
    197205
    198206    //  Posix thread (pthread.h) functions.
     
    211219    llvm::Value * CreatePThreadJoinCall(llvm::Value * thread, llvm::Value * value_ptr);
    212220
    213     void CallPrintIntCond(const std::string & name, llvm::Value * const value, llvm::Value * const cond);
    214 
    215     void CallPrintInt(const std::string & name, llvm::Value * const value);
    216    
    217     void CallPrintIntToStderr(const std::string & name, llvm::Value * const value);
    218    
     221    enum class STD_FD {
     222        STD_IN = STDIN_FILENO
     223        , STD_OUT = STDOUT_FILENO
     224        , STD_ERR = STDERR_FILENO
     225    };
     226
     227    void CallPrintIntCond(const std::string & name, llvm::Value * const value, llvm::Value * const cond, const STD_FD fd = STD_FD::STD_ERR);
     228
     229    void CallPrintInt(const std::string & name, llvm::Value * const value, const STD_FD fd = STD_FD::STD_ERR);
     230       
    219231    llvm::Value * GetString(llvm::StringRef Str);
    220 
    221     void CallPrintMsgToStderr(const std::string & message);
    222232
    223233    inline llvm::IntegerType * getSizeTy() const {
     
    226236    }
    227237   
    228     inline llvm::ConstantInt * getSize(const size_t value) {
     238    inline llvm::ConstantInt * LLVM_READNONE getSize(const size_t value) {
    229239        return llvm::ConstantInt::get(getSizeTy(), value);
    230240    }
     
    239249        return mCacheLineAlignment;
    240250    }
     251
     252    static LLVM_READNONE unsigned getPageSize();
    241253   
    242254    virtual llvm::LoadInst* CreateAtomicLoadAcquire(llvm::Value * ptr);
     
    245257
    246258    void CreateAssert(llvm::Value * assertion, const llvm::Twine & failureMessage) {
    247         if (LLVM_UNLIKELY(assertion->getType()->isVectorTy())) {
    248             assertion = CreateBitCast(assertion, getIntNTy(assertion->getType()->getPrimitiveSizeInBits()));
    249         }
    250259        return __CreateAssert(CreateIsNotNull(assertion), failureMessage);
    251260    }
    252261
    253262    void CreateAssertZero(llvm::Value * assertion, const llvm::Twine & failureMessage) {
    254         if (LLVM_UNLIKELY(assertion->getType()->isVectorTy())) {
    255             assertion = CreateBitCast(assertion, getIntNTy(assertion->getType()->getPrimitiveSizeInBits()));
    256         }
    257263        return __CreateAssert(CreateIsNull(assertion), failureMessage);
    258264    }
     
    292298
    293299    template <typename ExternalFunctionType>
    294     llvm::Function * LinkFunction(llvm::StringRef name, ExternalFunctionType * functionPtr) const;
     300    llvm::Function * LinkFunction(llvm::StringRef name, ExternalFunctionType & functionPtr) const;
     301
     302    llvm::Function * LinkFunction(llvm::StringRef name, llvm::FunctionType * type, void * functionPtr) const;
    295303
    296304    virtual llvm::LoadInst * CreateLoad(llvm::Value * Ptr, const char * Name);
     
    350358                           llvm::MDNode *NoAliasTag = nullptr);
    351359   
     360    llvm::Value * CreateExtractElement(llvm::Value *Vec, llvm::Value *Idx, const llvm::Twine &Name = "");
     361
     362    llvm::Value * CreateExtractElement(llvm::Value *Vec, uint64_t Idx, const llvm::Twine &Name = "") {
     363        return CreateExtractElement(Vec, getInt64(Idx), Name);
     364    }
     365
     366    llvm::Value * CreateInsertElement(llvm::Value *Vec, llvm::Value *NewElt, llvm::Value *Idx, const llvm::Twine &Name = "");
     367
     368    llvm::Value * CreateInsertElement(llvm::Value *Vec, llvm::Value *NewElt, uint64_t Idx, const llvm::Twine &Name = "") {
     369        return CreateInsertElement(Vec, NewElt, getInt64(Idx), Name);
     370    }
     371
    352372    llvm::CallInst * CreateSRandCall(llvm::Value * randomSeed);
    353373    llvm::CallInst * CreateRandCall();
    354374
    355     void setDriver(Driver * const driver) {
     375    void setDriver(BaseDriver * const driver) {
    356376        mDriver = driver;
    357377    }
     
    366386
    367387    void __CreateAssert(llvm::Value * assertion, const llvm::Twine & failureMessage);
    368 
    369     llvm::Function * LinkFunction(llvm::StringRef name, llvm::FunctionType * type, void * functionPtr) const;
    370388
    371389protected:
     
    375393    llvm::IntegerType * const       mSizeType;
    376394    llvm::StructType *              mFILEtype;
    377     Driver *                        mDriver;   
     395    BaseDriver *                        mDriver;
    378396    llvm::LLVMContext               mContext;
    379397    const std::string               mTriple;
     
    381399
    382400template <typename ExternalFunctionType>
    383 llvm::Function *CBuilder::LinkFunction(llvm::StringRef name, ExternalFunctionType * functionPtr) const {
     401llvm::Function * CBuilder::LinkFunction(llvm::StringRef name, ExternalFunctionType & functionPtr) const {
    384402    llvm::FunctionType * const type = FunctionTypeBuilder<ExternalFunctionType>::get(getContext());
    385403    assert ("FunctionTypeBuilder did not resolve a function type." && type);
    386     return LinkFunction(name, type, reinterpret_cast<void *>(functionPtr));
     404    return LinkFunction(name, type, reinterpret_cast<void *>(&functionPtr));
    387405}
    388406
     407llvm::ModulePass * createRemoveRedundantAssertionsPass();
     408
    389409#endif
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_avx_builder.cpp

    r6113 r6184  
    326326}
    327327
    328 llvm::Value * IDISA_AVX2_Builder::mvmd_srl(unsigned fw, llvm::Value * a, llvm::Value * shift) {
     328llvm::Value * IDISA_AVX2_Builder::mvmd_srl(unsigned fw, llvm::Value * a, llvm::Value * shift, const bool safe) {
    329329    // Intrinsic::x86_avx2_permd) allows an efficient implementation for field width 32.
    330330    // Translate larger field widths to 32 bits.
    331331    if (fw > 32) {
    332         return fwCast(fw, mvmd_srl(32, a, CreateMul(shift, ConstantInt::get(shift->getType(), fw/32))));
     332        return fwCast(fw, mvmd_srl(32, a, CreateMul(shift, ConstantInt::get(shift->getType(), fw/32)), safe));
    333333    }
    334334    if ((mBitBlockWidth == 256) && (fw == 32)) {
     
    351351        return simd_if(1, simd_eq(fw, shiftSplat, allZeroes()), a, shifted);
    352352    }
    353     return IDISA_Builder::mvmd_srl(fw, a, shift);
    354 }
    355 
    356 llvm::Value * IDISA_AVX2_Builder::mvmd_sll(unsigned fw, llvm::Value * a, llvm::Value * shift) {
     353    return IDISA_Builder::mvmd_srl(fw, a, shift, safe);
     354}
     355
     356llvm::Value * IDISA_AVX2_Builder::mvmd_sll(unsigned fw, llvm::Value * a, llvm::Value * shift, const bool safe) {
    357357    // Intrinsic::x86_avx2_permd) allows an efficient implementation for field width 32.
    358358    // Translate larger field widths to 32 bits.
    359359    if (fw > 32) {
    360         return fwCast(fw, mvmd_sll(32, a, CreateMul(shift, ConstantInt::get(shift->getType(), fw/32))));
     360        return fwCast(fw, mvmd_sll(32, a, CreateMul(shift, ConstantInt::get(shift->getType(), fw/32)), safe));
    361361    }
    362362    if ((mBitBlockWidth == 256) && (fw == 32)) {
     
    379379        return simd_if(1, simd_eq(fw, shiftSplat, allZeroes()), a, shifted);
    380380    }
    381     return IDISA_Builder::mvmd_sll(fw, a, shift);
     381    return IDISA_Builder::mvmd_sll(fw, a, shift, safe);
    382382}
    383383
     
    535535}
    536536
    537 llvm::Value * IDISA_AVX512F_Builder::mvmd_srl(unsigned fw, llvm::Value * a, llvm::Value * shift) {
     537llvm::Value * IDISA_AVX512F_Builder::mvmd_srl(unsigned fw, llvm::Value * a, llvm::Value * shift, const bool safe) {
    538538    const unsigned fieldCount = mBitBlockWidth/fw;
    539539    Type * fieldTy = getIntNTy(fw);
     
    554554        }
    555555    }
    556     return IDISA_Builder::mvmd_srl(fw, a, shift);
     556    return IDISA_Builder::mvmd_srl(fw, a, shift, safe);
    557557}
    558558 
    559 llvm::Value * IDISA_AVX512F_Builder::mvmd_sll(unsigned fw, llvm::Value * a, llvm::Value * shift) {
     559llvm::Value * IDISA_AVX512F_Builder::mvmd_sll(unsigned fw, llvm::Value * a, llvm::Value * shift, const bool safe) {
    560560    const unsigned fieldCount = mBitBlockWidth/fw;
    561561    Type * fieldTy = getIntNTy(fw);
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_avx_builder.h

    r6107 r6184  
    5252    std::pair<llvm::Value *, llvm::Value *> bitblock_indexed_advance(llvm::Value * a, llvm::Value * index_strm, llvm::Value * shiftin, unsigned shift) override;
    5353    llvm::Value * hsimd_signmask(unsigned fw, llvm::Value * a) override;
    54     llvm::Value * mvmd_srl(unsigned fw, llvm::Value * a, llvm::Value * shift) override;
    55     llvm::Value * mvmd_sll(unsigned fw, llvm::Value * a, llvm::Value * shift) override;
     54    llvm::Value * mvmd_srl(unsigned fw, llvm::Value * a, llvm::Value * shift, const bool safe = false) override;
     55    llvm::Value * mvmd_sll(unsigned fw, llvm::Value * a, llvm::Value * shift, const bool safe = false) override;
    5656    llvm::Value * mvmd_shuffle(unsigned fw, llvm::Value * data_table, llvm::Value * index_vector) override;
    5757    llvm::Value * mvmd_compress(unsigned fw, llvm::Value * a, llvm::Value * select_mask) override;
     
    8787    llvm::Value * mvmd_shuffle2(unsigned fw, llvm::Value * table0, llvm::Value * table1, llvm::Value * index_vector) override;
    8888    llvm::Value * mvmd_compress(unsigned fw, llvm::Value * a, llvm::Value * select_mask) override;
    89     llvm::Value * mvmd_srl(unsigned fw, llvm::Value * a, llvm::Value * shift) override;
    90     llvm::Value * mvmd_sll(unsigned fw, llvm::Value * a, llvm::Value * shift) override;
     89    llvm::Value * mvmd_srl(unsigned fw, llvm::Value * a, llvm::Value * shift, const bool safe) override;
     90    llvm::Value * mvmd_sll(unsigned fw, llvm::Value * a, llvm::Value * shift, const bool safe) override;
    9191    llvm::Value * simd_if(unsigned fw, llvm::Value * cond, llvm::Value * a, llvm::Value * b) override;
    9292
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_builder.cpp

    r6164 r6184  
    3939}
    4040
    41 void IDISA_Builder::CallPrintRegisterCond(const std::string & regName, llvm::Value * const value, llvm::Value * const cond) {
    42     BasicBlock* callBlock = this->CreateBasicBlock("callBlock");
    43     BasicBlock* exitBlock = this->CreateBasicBlock("exitBlock");
    44     this->CreateCondBr(cond, callBlock, exitBlock);
    45 
    46     this->SetInsertPoint(callBlock);
    47     this->CallPrintRegister(regName, value);
    48 
    49     this->CreateBr(exitBlock);
    50     this->SetInsertPoint(exitBlock);
    51 }
    52 
    53 void IDISA_Builder::CallPrintRegister(const std::string & name, Value * const value) {
     41void IDISA_Builder::CallPrintRegisterCond(const std::string & regName, llvm::Value * const value, llvm::Value * const cond, const STD_FD fd) {
     42    BasicBlock * const insertBefore = GetInsertBlock()->getNextNode();
     43    BasicBlock* const callBlock = CreateBasicBlock("callBlock", insertBefore);
     44    BasicBlock* const exitBlock = CreateBasicBlock("exitBlock", insertBefore);
     45    CreateCondBr(cond, callBlock, exitBlock);
     46    CallPrintRegister(regName, value, fd);
     47    CreateBr(exitBlock);
     48    SetInsertPoint(exitBlock);
     49}
     50
     51void IDISA_Builder::CallPrintRegister(const std::string & name, Value * const value, const STD_FD fd) {
    5452    Module * const m = getModule();
    55     Constant * printRegister = m->getFunction("PrintRegister");
     53    Constant * printRegister = m->getFunction("print_register");
    5654    if (LLVM_UNLIKELY(printRegister == nullptr)) {
    57         FunctionType *FT = FunctionType::get(getVoidTy(), { PointerType::get(getInt8Ty(), 0), getBitBlockType() }, false);
    58         Function * function = Function::Create(FT, Function::InternalLinkage, "PrintRegister", m);
     55        FunctionType *FT = FunctionType::get(getVoidTy(), { getInt32Ty(), getInt8PtrTy(0), getBitBlockType() }, false);
     56        Function * function = Function::Create(FT, Function::InternalLinkage, "print_register", m);
    5957        auto arg = function->arg_begin();
    6058        std::string tmp;
    6159        raw_string_ostream out(tmp);
    6260        out << "%-40s =";
    63         for(unsigned i = 0; i < (mBitBlockWidth / 8); ++i) {
     61        for(unsigned i = 0; i < (getBitBlockWidth() / 8); ++i) {
    6462            out << " %02x";
    6563        }
     
    6866        IRBuilder<> builder(entry);
    6967        std::vector<Value *> args;
    70         args.push_back(getInt32(STDERR_FILENO));
     68        Value * const fdInt = &*(arg++);
     69        args.push_back(fdInt);
    7170        args.push_back(GetString(out.str().c_str()));
    7271        Value * const name = &*(arg++);
     
    7776        Type * const byteVectorType = VectorType::get(getInt8Ty(), (mBitBlockWidth / 8));
    7877        value = builder.CreateBitCast(value, byteVectorType);
    79         for(unsigned i = (mBitBlockWidth / 8); i != 0; --i) {
     78        for(unsigned i = (getBitBlockWidth() / 8); i != 0; --i) {
    8079            args.push_back(builder.CreateZExt(builder.CreateExtractElement(value, builder.getInt32(i - 1)), builder.getInt32Ty()));
    8180        }
     
    8483        printRegister = function;
    8584    }
    86     CreateCall(printRegister, {GetString(name.c_str()), CreateBitCast(value, mBitBlockType)});
     85    CreateCall(printRegister, {getInt32(static_cast<uint32_t>(fd)), GetString(name.c_str()), CreateBitCast(value, getBitBlockType())});
    8786}
    8887
     
    285284}
    286285
    287 Value * IDISA_Builder::mvmd_sll(unsigned fw, Value * value, Value * shift) {
    288     VectorType * const vecTy = cast<VectorType>(value->getType());
     286Value * IDISA_Builder::mvmd_sll(unsigned fw, Value * value, Value * shift, const bool safe) {
     287    VectorType * const vecTy = fwVectorType(fw);
    289288    IntegerType * const intTy = getIntNTy(vecTy->getBitWidth());
    290     if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    291         Type * const ty = shift->getType();
    292         Value * const scaled = CreateMul(shift, ConstantInt::get(ty, fw));
    293         Value * const inbounds = CreateICmpULT(scaled, ConstantInt::get(ty, vecTy->getBitWidth()));
    294         CreateAssert(inbounds, "poison shift value: >= vector width");
    295     }
     289    Constant * const FIELD_WIDTH = ConstantInt::get(shift->getType(), fw);
     290    Constant * const BLOCK_WIDTH = ConstantInt::get(shift->getType(), vecTy->getBitWidth());
     291    shift = CreateMul(shift, FIELD_WIDTH);
     292//    if (LLVM_UNLIKELY(safe && codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
     293//        Value * const inbounds = CreateICmpULT(shift, BLOCK_WIDTH);
     294//        CreateAssert(inbounds, "poison shift value: >= vector width");
     295//    }
     296    Value * result = nullptr;
    296297    value = CreateBitCast(value, intTy);
    297     shift = CreateZExtOrTrunc(CreateMul(shift, ConstantInt::get(shift->getType(), fw)), intTy);
    298     return CreateBitCast(CreateShl(value, shift), vecTy);
     298//    if (safe) {
     299        shift = CreateZExtOrTrunc(shift, intTy);
     300        result = CreateShl(value, shift);
     301//    } else {
     302//        // TODO: check the ASM generated by this to see what the select generates
     303//        Value * const moddedShift = CreateURem(shift, BLOCK_WIDTH);
     304//        Value * const inbounds = CreateICmpEQ(moddedShift, shift);
     305//        shift = CreateZExtOrTrunc(moddedShift, intTy);
     306//        Constant * const ZEROES = Constant::getNullValue(intTy);
     307//        result = CreateShl(value, shift);
     308//        result = CreateSelect(inbounds, result, ZEROES);
     309//    }
     310    return CreateBitCast(result, vecTy);
    299311}
    300312
     
    313325}
    314326
    315 Value * IDISA_Builder::mvmd_srl(unsigned fw, Value * value, Value * shift) {
    316     VectorType * const vecTy = cast<VectorType>(value->getType());
     327Value * IDISA_Builder::mvmd_srl(unsigned fw, Value * value, Value * shift, const bool safe) {
     328    VectorType * const vecTy = fwVectorType(fw);
    317329    IntegerType * const intTy = getIntNTy(vecTy->getBitWidth());
    318     if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    319         Type * const ty = shift->getType();
    320         Value * const scaled = CreateMul(shift, ConstantInt::get(ty, fw));
    321         Value * const inbounds = CreateICmpULT(scaled, ConstantInt::get(ty, vecTy->getBitWidth()));
    322         CreateAssert(inbounds, "poison shift value: >= vector width");
    323     }
     330    Constant * const FIELD_WIDTH = ConstantInt::get(shift->getType(), fw);
     331    Constant * const BLOCK_WIDTH = ConstantInt::get(shift->getType(), vecTy->getBitWidth());
     332    shift = CreateMul(shift, FIELD_WIDTH);
     333//    if (LLVM_UNLIKELY(safe && codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
     334//        Value * const inbounds = CreateICmpULT(shift, BLOCK_WIDTH);
     335//        CreateAssert(inbounds, "poison shift value: >= vector width");
     336//    }
     337    Value * result = nullptr;
    324338    value = CreateBitCast(value, intTy);
    325     shift = CreateZExtOrTrunc(CreateMul(shift, ConstantInt::get(shift->getType(), fw)), intTy);
    326     return CreateBitCast(CreateLShr(value, shift), vecTy);
     339//    if (safe) {
     340        shift = CreateZExtOrTrunc(shift, intTy);
     341        result = CreateLShr(value, shift);
     342//    } else {
     343//        // TODO: check the ASM generated by this to see what the select generates
     344//        Value * const moddedShift = CreateURem(shift, BLOCK_WIDTH);
     345//        Value * const inbounds = CreateICmpEQ(moddedShift, shift);
     346//        shift = CreateZExtOrTrunc(moddedShift, intTy);
     347//        Constant * const ZEROES = Constant::getNullValue(intTy);
     348//        result = CreateLShr(value, shift);
     349//        result = CreateSelect(inbounds, result, ZEROES);
     350//    }
     351    return CreateBitCast(result, vecTy);
    327352}
    328353
     
    787812}
    788813
    789 Value * IDISA_Builder::bitblock_mask_from(Value * pos) {
    790     Value * p = CreateZExtOrTrunc(pos, getSizeTy());
    791     const unsigned fw = getSizeTy()->getBitWidth();
    792     const auto field_count = mBitBlockWidth / fw;
    793     Constant * fwVal = ConstantInt::get(getSizeTy(), fw);
    794     Constant * poaBase[field_count];
    795     for (unsigned i = 0; i < field_count; i++) {
    796         poaBase[i] = ConstantInt::get(getSizeTy(), fw * i);
    797     }
    798     Value * posBaseVec = ConstantVector::get({poaBase, field_count});
    799     Value * mask1 = CreateSExt(CreateICmpUGT(posBaseVec, simd_fill(fw, pos)), fwVectorType(fw));
    800     Value * bitField = CreateShl(ConstantInt::getAllOnesValue(getSizeTy()), CreateURem(p, fwVal));
    801     Value * inBitBlock = CreateICmpULT(p, getSize(mBitBlockWidth));
    802     Value * fieldNo = CreateUDiv(p, fwVal);
    803     Value * const final_mask = CreateSelect(inBitBlock, CreateInsertElement(mask1, bitField, fieldNo), mask1);
    804     return bitCast(final_mask);
    805 }
    806 
    807 Value * IDISA_Builder::bitblock_set_bit(Value * pos) {
    808     Value * p = CreateZExtOrTrunc(pos, getSizeTy());
    809     const unsigned fw = getSizeTy()->getBitWidth();
    810     Constant * fwVal = ConstantInt::get(getSizeTy(), fw);
    811     Value * bitField = CreateShl(ConstantInt::get(getSizeTy(), 1), CreateURem(p, fwVal));
    812     Value * fieldNo = CreateUDiv(p, fwVal);
    813     return bitCast(CreateInsertElement(Constant::getNullValue(fwVectorType(fw)), bitField, fieldNo));
     814Value * IDISA_Builder::bitblock_mask_from(Value * const position, const bool safe) {
     815    Value * const originalPos = CreateZExtOrTrunc(position, getSizeTy());
     816    if (LLVM_UNLIKELY(safe && codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
     817        Constant * const BLOCK_WIDTH = getSize(mBitBlockWidth);
     818        CreateAssert(CreateICmpULT(originalPos, BLOCK_WIDTH), "position exceeds block width");
     819    }
     820    Value * const pos = safe ? position : CreateAnd(originalPos, getSize(mBitBlockWidth - 1));
     821    const unsigned fieldWidth = getSizeTy()->getBitWidth();
     822    const auto fieldCount = mBitBlockWidth / fieldWidth;
     823    Constant * posBase[fieldCount];
     824    for (unsigned i = 0; i < fieldCount; i++) {
     825        posBase[i] = ConstantInt::get(getSizeTy(), fieldWidth * i);
     826    }
     827    Value * const posBaseVec = ConstantVector::get({posBase, fieldCount});
     828    Value * const positionVec = simd_fill(fieldWidth, pos);
     829    Value * const fullFieldWidthMasks = CreateSExt(CreateICmpUGT(posBaseVec, positionVec), fwVectorType(fieldWidth));
     830    Constant * const FIELD_ONES = ConstantInt::getAllOnesValue(getSizeTy());
     831    Value * const bitField = CreateShl(FIELD_ONES, CreateAnd(pos, getSize(fieldWidth - 1)));
     832    Value * const fieldNo = CreateLShr(pos, getSize(std::log2(fieldWidth)));   
     833    Value * result = CreateInsertElement(fullFieldWidthMasks, bitField, fieldNo);
     834    if (!safe) { // if the originalPos doesn't match the moddedPos then the originalPos must exceed the block width.
     835        Constant * const VECTOR_ZEROES = Constant::getNullValue(fwVectorType(fieldWidth));
     836        result = CreateSelect(CreateICmpEQ(originalPos, pos), result, VECTOR_ZEROES);
     837    }
     838    return bitCast(result);
     839}
     840
     841Value * IDISA_Builder::bitblock_mask_to(Value * const position, const bool safe) {
     842    Value * const originalPos = CreateZExtOrTrunc(position, getSizeTy());
     843    if (LLVM_UNLIKELY(safe && codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
     844        Constant * const BLOCK_WIDTH = getSize(mBitBlockWidth);
     845        CreateAssert(CreateICmpULT(originalPos, BLOCK_WIDTH), "position exceeds block width");
     846    }
     847    Value * const pos = safe ? position : CreateAnd(originalPos, getSize(mBitBlockWidth - 1));
     848    const unsigned fieldWidth = getSizeTy()->getBitWidth();
     849    const auto fieldCount = mBitBlockWidth / fieldWidth;
     850    Constant * posBase[fieldCount];
     851    for (unsigned i = 0; i < fieldCount; i++) {
     852        posBase[i] = ConstantInt::get(getSizeTy(), fieldWidth * i);
     853    }
     854    Value * const posBaseVec = ConstantVector::get({posBase, fieldCount});
     855    Value * const positionVec = simd_fill(fieldWidth, pos);
     856    Value * const fullFieldWidthMasks = CreateSExt(CreateICmpULT(posBaseVec, positionVec), fwVectorType(fieldWidth));
     857    Constant * const FIELD_ONES = ConstantInt::getAllOnesValue(getSizeTy());
     858    Value * const bitField = CreateLShr(FIELD_ONES, CreateAnd(getSize(fieldWidth - 1), CreateNot(pos)));
     859    Value * const fieldNo = CreateLShr(pos, getSize(std::log2(fieldWidth)));
     860    Value * result = CreateInsertElement(fullFieldWidthMasks, bitField, fieldNo);
     861    if (!safe) { // if the originalPos doesn't match the moddedPos then the originalPos must exceed the block width.
     862        Constant * const VECTOR_ONES = Constant::getAllOnesValue(fwVectorType(fieldWidth));
     863        result = CreateSelect(CreateICmpEQ(originalPos, pos), result, VECTOR_ONES);
     864    }
     865    return bitCast(result);
     866}
     867
     868Value * IDISA_Builder::bitblock_set_bit(Value * const position, const bool safe) {
     869    Value * const originalPos = CreateZExtOrTrunc(position, getSizeTy());
     870    if (LLVM_UNLIKELY(safe && codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
     871        Constant * const BLOCK_WIDTH = getSize(mBitBlockWidth);
     872        CreateAssert(CreateICmpULT(originalPos, BLOCK_WIDTH), "position exceeds block width");
     873    }
     874    const unsigned fieldWidth = getSizeTy()->getBitWidth();
     875    Value * const bitField = CreateShl(getSize(1), CreateAnd(originalPos, getSize(fieldWidth - 1)));
     876    Value * const pos = safe ? position : CreateAnd(originalPos, getSize(mBitBlockWidth - 1));
     877    Value * const fieldNo = CreateLShr(pos, getSize(std::log2(fieldWidth)));
     878    Constant * const VECTOR_ZEROES = Constant::getNullValue(fwVectorType(fieldWidth));
     879    Value * result = CreateInsertElement(VECTOR_ZEROES, bitField, fieldNo);
     880    if (!safe) { // If the originalPos doesn't match the moddedPos then the originalPos must exceed the block width.
     881        result = CreateSelect(CreateICmpEQ(originalPos, pos), result, VECTOR_ZEROES);
     882    }
     883    return bitCast(result);
    814884}
    815885
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_builder.h

    r6111 r6184  
    155155    virtual llvm::Value * mvmd_insert(unsigned fw, llvm::Value * blk, llvm::Value * elt, unsigned fieldIndex);
    156156
    157     virtual llvm::Value * mvmd_sll(unsigned fw, llvm::Value * value, llvm::Value * shift);
    158     virtual llvm::Value * mvmd_srl(unsigned fw, llvm::Value * value, llvm::Value * shift);
     157    virtual llvm::Value * mvmd_sll(unsigned fw, llvm::Value * value, llvm::Value * shift, const bool safe = false);
     158    virtual llvm::Value * mvmd_srl(unsigned fw, llvm::Value * value, llvm::Value * shift, const bool safe = false);
    159159    virtual llvm::Value * mvmd_slli(unsigned fw, llvm::Value * a, unsigned shift);
    160160    virtual llvm::Value * mvmd_srli(unsigned fw, llvm::Value * a, unsigned shift);
     
    172172    virtual std::pair<llvm::Value *, llvm::Value *> bitblock_advance(llvm::Value * a, llvm::Value * shiftin, unsigned shift);
    173173    virtual std::pair<llvm::Value *, llvm::Value *> bitblock_indexed_advance(llvm::Value * a, llvm::Value * index_strm, llvm::Value * shiftin, unsigned shift);
    174     virtual llvm::Value * bitblock_mask_from(llvm::Value * pos);
    175     virtual llvm::Value * bitblock_set_bit(llvm::Value * pos);
     174    virtual llvm::Value * bitblock_mask_from(llvm::Value * const position, const bool safe = false);
     175    virtual llvm::Value * bitblock_mask_to(llvm::Value * const position, const bool safe = false);
     176    virtual llvm::Value * bitblock_set_bit(llvm::Value * const position, const bool safe = false);
    176177
    177178    // returns a scalar with the popcount of this block
     
    206207    }
    207208
    208     void CallPrintRegisterCond(const std::string & regName, llvm::Value * const value, llvm::Value * const cond);
    209     void CallPrintRegister(const std::string & regName, llvm::Value * const value);
     209    void CallPrintRegisterCond(const std::string & regName, llvm::Value * const value, llvm::Value * const cond, const STD_FD fd = STD_FD::STD_ERR);
     210    void CallPrintRegister(const std::string & regName, llvm::Value * const value, const STD_FD fd = STD_FD::STD_ERR);
    210211
    211212protected:
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_nvptx_builder.cpp

    r5841 r6184  
    2828}
    2929
    30 Value * IDISA_NVPTX20_Builder::bitblock_mask_from(Value * pos){
     30Value * IDISA_NVPTX20_Builder::bitblock_mask_from(Value * pos, const bool safe){
    3131    Type * const int64ty = getInt64Ty();
    3232    Value * id = CreateCall(tidFunc);
     
    4040}
    4141
    42 Value * IDISA_NVPTX20_Builder::bitblock_set_bit(Value * pos){
     42Value * IDISA_NVPTX20_Builder::bitblock_set_bit(Value * pos, const bool safe){
    4343    Type * const int64ty = getInt64Ty();
    4444    Value * id = CreateCall(tidFunc);
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_nvptx_builder.h

    r6107 r6184  
    3838    std::pair<llvm::Value *, llvm::Value *> bitblock_add_with_carry(llvm::Value * a, llvm::Value * b, llvm::Value * carryin) override;
    3939    virtual std::pair<llvm::Value *, llvm::Value *> bitblock_advance(llvm::Value * a, llvm::Value * shiftin, unsigned shift) override;
    40     llvm::Value * bitblock_mask_from(llvm::Value * pos) override;
    41     llvm::Value * bitblock_set_bit(llvm::Value * pos) override;
     40    llvm::Value * bitblock_mask_from(llvm::Value * pos, const bool safe) override;
     41    llvm::Value * bitblock_set_bit(llvm::Value * pos, const bool safe) override;
    4242
    4343    llvm::Value * getEOFMask(llvm::Value * remainingBytes);
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_sse_builder.cpp

    r6113 r6184  
    189189        Value * delete_marks_lo = CreateAnd(CreateNot(selector), ConstantInt::get(selector->getType(), 3));
    190190        Value * delCount_lo = CreateSub(delete_marks_lo, CreateLShr(delete_marks_lo, 1));
    191         return mvmd_srl(32, centralResult, delCount_lo);
     191        return mvmd_srl(32, centralResult, delCount_lo, true);
    192192    }
    193193    return IDISA_Builder::mvmd_compress(fw, a, selector);
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_target.h

    r6077 r6184  
    77#define IDISA_TARGET_H
    88
     9#include <llvm/Support/Compiler.h>
     10
    911namespace llvm { class LLVMContext; }
    1012namespace kernel { class KernelBuilder; }
    1113
    12 extern bool AVX2_available();
    13 extern bool AVX512BW_available();
     14extern LLVM_READNONE bool AVX2_available();
     15extern LLVM_READNONE bool AVX512BW_available();
    1416
    1517namespace IDISA {
  • icGREP/icgrep-devel/icgrep/UCD/PropertyObjects.cpp

    r6175 r6184  
    1212#include <sstream>
    1313#include <llvm/Support/ErrorHandling.h>
     14#include <toolchain/cpudriver.h>
    1415#include <grep/grep_engine.h>
    1516#include <util/aligned_allocator.h>
     
    1718#include <re/re_cc.h>
    1819#include <codecvt>
     20
    1921
    2022using namespace llvm;
     
    9698   
    9799    PropertyValueAccumulator accum(accumulatedValues);
    98    
    99     grep::InternalSearchEngine engine;
     100    CPUDriver driver("driver");
     101    grep::InternalSearchEngine engine(driver);
    100102    engine.setRecordBreak(grep::GrepRecordBreakKind::LF);
    101     engine.grepCodeGen(pattern, nullptr, & accum);
    102     engine.doGrep(aligned, n);
     103    engine.grepCodeGen(pattern, nullptr);
     104    engine.doGrep(aligned, n, accum);
    103105    //grepBuffer(pattern, aligned, n, & accum);
    104106    alloc.deallocate(aligned, 0);
    105107   
    106108    UnicodeSet a;
    107     for (const auto & v : accumulatedValues) {
    108        
    109         int e = GetPropertyValueEnumCode(v);
     109    for (const auto & v : accumulatedValues) {       
     110        const auto e = GetPropertyValueEnumCode(v);
    110111        a.insert(GetCodepointSet(e));
    111112    }
     
    239240   
    240241    PropertyValueAccumulator accum(accumulatedValues);
    241     grep::InternalSearchEngine engine;
     242    CPUDriver driver("driver");
     243    grep::InternalSearchEngine engine(driver);
    242244    engine.setRecordBreak(grep::GrepRecordBreakKind::LF);
    243     engine.grepCodeGen(pattern, nullptr, & accum);
    244     engine.doGrep(aligned, n);
     245    engine.grepCodeGen(pattern, nullptr);
     246    engine.doGrep(aligned, n, accum);
    245247    alloc.deallocate(aligned, 0);
    246248   
     
    364366const UnicodeSet NumericPropertyObject::GetCodepointSetMatchingPattern(re::RE * pattern) {
    365367    SetByLineNumberAccumulator accum(mExplicitCps, mNaNCodepointSet);
    366     grep::InternalSearchEngine engine;
     368    CPUDriver driver("driver");
     369    grep::InternalSearchEngine engine(driver);
    367370    engine.setRecordBreak(grep::GrepRecordBreakKind::LF);
    368     engine.grepCodeGen(pattern, nullptr, & accum);
    369     engine.doGrep(mStringBuffer, mBufSize);
     371    engine.grepCodeGen(pattern, nullptr);
     372    engine.doGrep(mStringBuffer, mBufSize, accum);
    370373    //grepBuffer(pattern, mStringBuffer, mBufSize, &accum);
    371374    return accum.getAccumulatedSet();
     
    407410    }
    408411    SetByLineNumberAccumulator accum(mExplicitCps, mNullCodepointSet);
    409     grep::InternalSearchEngine engine;
     412    CPUDriver driver("driver");
     413    grep::InternalSearchEngine engine(driver);
    410414    engine.setRecordBreak(grep::GrepRecordBreakKind::LF);
    411     engine.grepCodeGen(pattern, nullptr, & accum);
     415    engine.grepCodeGen(pattern, nullptr);
    412416    const unsigned bufSize = mStringOffsets[mExplicitCps.size()];
    413     engine.doGrep(mStringBuffer, bufSize);
     417    engine.doGrep(mStringBuffer, bufSize, accum);
    414418    matched.insert(accum.getAccumulatedSet());
    415419    return matched;
     
    466470    UnicodeSet base_set = mBaseObject.GetCodepointSetMatchingPattern(pattern) - mOverriddenSet;
    467471    SetByLineNumberAccumulator accum(mExplicitCps, UnicodeSet());
    468     grep::InternalSearchEngine engine;
     472    CPUDriver driver("driver");
     473    grep::InternalSearchEngine engine(driver);
    469474    engine.setRecordBreak(grep::GrepRecordBreakKind::LF);
    470     engine.grepCodeGen(pattern, nullptr, & accum);
     475    engine.grepCodeGen(pattern, nullptr);
    471476    const unsigned bufSize = mStringOffsets[mExplicitCps.size()];
    472     engine.doGrep(mStringBuffer, bufSize);
     477    engine.doGrep(mStringBuffer, bufSize, accum);
    473478    base_set.insert(accum.getAccumulatedSet());
    474479    return base_set;
  • icGREP/icgrep-devel/icgrep/UCD/PropertyObjects.h

    r6175 r6184  
    5151};
    5252
    53 class BinaryPropertyObject : public PropertyObject {
     53class BinaryPropertyObject final : public PropertyObject {
    5454public:
    5555    static inline bool classof(const PropertyObject * p) {
     
    7777};
    7878
    79 class EnumeratedPropertyObject : public PropertyObject {
     79class EnumeratedPropertyObject final : public PropertyObject {
    8080public:
    8181    static inline bool classof(const PropertyObject * p) {
     
    131131};
    132132
    133 class ExtensionPropertyObject : public PropertyObject {
     133class ExtensionPropertyObject final : public PropertyObject {
    134134public:
    135135    static inline bool classof(const PropertyObject * p) {
     
    165165};
    166166
    167 class NumericPropertyObject : public PropertyObject {
     167class NumericPropertyObject final : public PropertyObject {
    168168public:
    169169    static inline bool classof(const PropertyObject * p) {
     
    193193};
    194194
    195 class StringPropertyObject : public PropertyObject {
     195class StringPropertyObject final : public PropertyObject {
    196196public:
    197197    static inline bool classof(const PropertyObject * p) {
     
    227227};
    228228   
    229 class StringOverridePropertyObject : public PropertyObject {
     229class StringOverridePropertyObject final : public PropertyObject {
    230230public:
    231231    static inline bool classof(const PropertyObject * p) {
     
    262262};
    263263   
    264 class ObsoletePropertyObject : public PropertyObject {
     264class ObsoletePropertyObject final : public PropertyObject {
    265265public:
    266266    static inline bool classof(const PropertyObject * p) {
     
    279279};
    280280
    281 class UnsupportedPropertyObject : public PropertyObject {
     281class UnsupportedPropertyObject final : public PropertyObject {
    282282public:
    283283    static inline bool classof(const PropertyObject * p) {
  • icGREP/icgrep-devel/icgrep/base64.cpp

    r6047 r6184  
    2525#include <fcntl.h>
    2626#include <mutex>
     27#include <kernels/pipeline_builder.h>
    2728
    2829using namespace llvm;
     30using namespace codegen;
    2931
    3032static cl::OptionCategory base64Options("base64 Options",
     
    3739static cl::opt<int> Threads("threads", cl::desc("Total number of threads."), cl::init(1));
    3840
     41using namespace kernel;
    3942
    40 using namespace kernel;
    41 using namespace parabix;
     43typedef void (*base64FunctionType)(const uint32_t fd);
    4244
    43 void base64PipelineGen(ParabixDriver & pxDriver) {
    44        
     45base64FunctionType base64PipelineGen(CPUDriver & pxDriver) {
    4546    auto & iBuilder = pxDriver.getBuilder();
    46     Module * mod = iBuilder->getModule();
    47     Type * bitBlockType = iBuilder->getBitBlockType();
    48 
    49     Type * const voidTy = iBuilder->getVoidTy();
    5047    Type * const int32Ty = iBuilder->getInt32Ty();
    51     Type * const outputType = PointerType::get(ArrayType::get(ArrayType::get(bitBlockType, 8), 1), 0);
    52    
    53    
    54     Function * const main = cast<Function>(mod->getOrInsertFunction("Main", voidTy, int32Ty, outputType, nullptr));
    55     main->setCallingConv(CallingConv::C);
    56     auto args = main->arg_begin();
    57    
    58     Value * const fileDescriptor = &*(args++);
    59     fileDescriptor->setName("fileDescriptor");
    60     Value * const outputStream = &*(args++);
    61     outputStream->setName("outputStream");
    62     iBuilder->SetInsertPoint(BasicBlock::Create(mod->getContext(), "entry", main,0));
    63 
    64     //Round up to a multiple of 3.
    65     const auto bufferSize = (codegen::SegmentSize * codegen::BufferSegments);
    66     const auto expandedSize = boost::lcm(boost::lcm(bufferSize, 3U), 4U);
    67 
    68     StreamSetBuffer * ByteStream = pxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
    69     Kernel * mmapK = pxDriver.addKernelInstance<MMapSourceKernel>(iBuilder);
    70     mmapK->setInitialArguments({fileDescriptor});
    71     pxDriver.makeKernelCall(mmapK, {}, {ByteStream});
    72    
    73     StreamSetBuffer * Expanded3_4Out = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), expandedSize);
    74     Kernel * expandK = pxDriver.addKernelInstance<expand3_4Kernel>(iBuilder);
    75     pxDriver.makeKernelCall(expandK, {ByteStream}, {Expanded3_4Out});
    76    
    77     StreamSetBuffer * Radix64out = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), bufferSize);
    78     Kernel * radix64K = pxDriver.addKernelInstance<radix64Kernel>(iBuilder);
    79     pxDriver.makeKernelCall(radix64K, {Expanded3_4Out}, {Radix64out});
    80    
    81     if (memAlignBuffering){
    82         auto Base64out = pxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), outputStream);
    83         Kernel * base64K = pxDriver.addKernelInstance<base64Kernel>(iBuilder);
    84         pxDriver.makeKernelCall(base64K, {Radix64out}, {Base64out});
    85     } else {
    86         StreamSetBuffer * Base64out = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), bufferSize);
    87         Kernel * base64K = pxDriver.addKernelInstance<base64Kernel>(iBuilder);
    88         pxDriver.makeKernelCall(base64K, {Radix64out}, {Base64out});       
    89         Kernel * outK = pxDriver.addKernelInstance<StdOutKernel>(iBuilder, 8);
    90         pxDriver.makeKernelCall(outK, {Base64out}, {});
    91     }
    92    
    93     pxDriver.generatePipelineIR();
    94     pxDriver.deallocateBuffers();
    95     iBuilder->CreateRetVoid();
    96 
    97     pxDriver.finalizeObject();
     48    auto P = pxDriver.makePipeline({Binding{int32Ty, "fd"}});
     49    Scalar * const fileDescriptor = P->getInputScalar("fd");
     50    StreamSet * const ByteStream = P->CreateStreamSet(1, 8);
     51    P->CreateKernelCall<MMapSourceKernel>(fileDescriptor, ByteStream);
     52    StreamSet * const Expanded3_4Out = P->CreateStreamSet(1, 8);
     53    P->CreateKernelCall<expand3_4Kernel>(ByteStream, Expanded3_4Out);
     54    StreamSet * const Radix64out = P->CreateStreamSet(1, 8);
     55    P->CreateKernelCall<radix64Kernel>(Expanded3_4Out, Radix64out);
     56    StreamSet * const base64 = P->CreateStreamSet(1, 8);
     57    P->CreateKernelCall<base64Kernel>(Radix64out, base64);
     58    P->CreateKernelCall<StdOutKernel>(base64);
     59    return reinterpret_cast<base64FunctionType>(P->compile());
    9860}
    99 
    100 typedef void (*base64FunctionType)(const uint32_t fd, char * outputBuffer);
    10161
    10262size_t file_size(const int fd) {
     
    10969
    11070void base64(base64FunctionType fn_ptr, const std::string & fileName) {
    111 
    11271    const int fd = open(fileName.c_str(), O_RDONLY);
    11372    if (LLVM_UNLIKELY(fd == -1)) {
     
    11574        return;
    11675    }
    117     if (mMapBuffering) {
    118         boost::interprocess::mapped_region outputBuffer(boost::interprocess::anonymous_shared_memory(2 * file_size(fd)));
    119         outputBuffer.advise(boost::interprocess::mapped_region::advice_willneed);
    120         outputBuffer.advise(boost::interprocess::mapped_region::advice_sequential);
    121         fn_ptr(fd, static_cast<char*>(outputBuffer.get_address()));
    122     } else if (memAlignBuffering) {
    123         unsigned inputSize = file_size(fd);
    124         unsigned paddingSize = (inputSize % 3) ? (4 - (inputSize % 3)) : 0;
    125         unsigned outputSize = inputSize * 4/3 + paddingSize;
    126 
    127         char * outputBuffer;
    128         if (posix_memalign(reinterpret_cast<void **>(&outputBuffer), 32, inputSize * 2)) {
    129             throw std::bad_alloc();
    130         }
    131         fn_ptr(fd, outputBuffer);
    132         fwrite(outputBuffer, outputSize, 1, stdout);
    133         free(reinterpret_cast<void *>(outputBuffer));
    134     } else { /* No external output buffer */
    135         fn_ptr(fd, nullptr);
    136     }
    137     close(fd);
    138    
    139 }
    140 
    141 std::mutex count_mutex;
    142 size_t fileCount;
    143 base64FunctionType fn_ptr;
    144 
    145 std::vector<char *> resultStrs;
    146 std::vector<int> filesizes;
    147 
    148 void *Base64ThreadFunction(void *args)
    149 {
    150     size_t fileIdx;
    151 
    152     count_mutex.lock();
    153     fileIdx = fileCount;
    154     fileCount++;
    155     count_mutex.unlock();
    156 
    157     while (fileIdx < inputFiles.size()) {
    158         const int fd = open(inputFiles[fileIdx].c_str(), O_RDONLY);
    159         if (LLVM_UNLIKELY(fd == -1)) {
    160             std::cerr << "Error: cannot open " << inputFiles[fileIdx] << " for processing. Skipped.\n";
    161             exit(-1);
    162         }
    163 
    164         char * outputBuffer;
    165         if (posix_memalign(reinterpret_cast<void **>(&outputBuffer), 32, 2 * file_size(fd))) {
    166             throw std::bad_alloc();
    167         }
    168    
    169         fn_ptr(fd, outputBuffer);
    170         resultStrs[fileIdx] = outputBuffer;
    171         filesizes[fileIdx] = file_size(fd);
    172 
    173         count_mutex.lock();
    174         fileIdx = fileCount;
    175         fileCount++;
    176         count_mutex.unlock();
    177     }
    178 
    179     pthread_exit(nullptr);
     76    fn_ptr(fd);
     77    close(fd);   
    18078}
    18179
     
    18381    codegen::ParseCommandLineOptions(argc, argv, {&base64Options, codegen::codegen_flags()});
    18482
    185     if (Threads == 1) {
    186         ParabixDriver pxDriver("base64");
    187         base64PipelineGen(pxDriver);
    188         fn_ptr = reinterpret_cast<base64FunctionType>(pxDriver.getMain());     
    189         for (unsigned i = 0; i != inputFiles.size(); ++i) {
    190             base64(fn_ptr, inputFiles[i]);
    191         }
     83    CPUDriver pxDriver("base64");
     84    auto fn_ptr = base64PipelineGen(pxDriver);
     85    for (unsigned i = 0; i != inputFiles.size(); ++i) {
     86        base64(fn_ptr, inputFiles[i]);
    19287    }
    193     else{
    194         memAlignBuffering = true;
    195         ParabixDriver pxDriver("base64");
    196         base64PipelineGen(pxDriver);
    197         fn_ptr = reinterpret_cast<base64FunctionType>(pxDriver.getMain());
    198 
    199         fileCount = 0;
    200         const unsigned n = inputFiles.size();
    201         resultStrs.resize(n);
    202         filesizes.resize(n);
    203 
    204         const unsigned numOfThreads = Threads;
    205         pthread_t threads[numOfThreads];
    206 
    207         for(unsigned long i = 0; i < numOfThreads; ++i){
    208             const int rc = pthread_create(&threads[i], NULL, Base64ThreadFunction, (void *)i);
    209             if (rc) {
    210                 llvm::report_fatal_error("Failed to create thread: code " + std::to_string(rc));
    211             }
    212         }
    213 
    214         for(unsigned i = 0; i < numOfThreads; ++i) {
    215             void * status = nullptr;
    216             const int rc = pthread_join(threads[i], &status);
    217             if (rc) {
    218                 llvm::report_fatal_error("Failed to join thread: code " + std::to_string(rc));
    219             }
    220         }
    221 
    222         for (unsigned i=0; i<resultStrs.size(); i++){
    223             unsigned paddingSize = (filesizes[i] % 3) ? (4 - (filesizes[i] % 3)) : 0;
    224             fwrite(resultStrs[i], filesizes[i] * 4/3 + paddingSize, 1, stdout);
    225         }
    226     }   
    227 
    22888    return 0;
    22989}
  • icGREP/icgrep-devel/icgrep/cc/alphabet.cpp

    r5800 r6184  
    1111namespace cc {
    1212   
    13 UnicodeMappableAlphabet::UnicodeMappableAlphabet(std::string alphabetName,
     13UnicodeMappableAlphabet::UnicodeMappableAlphabet(const std::string alphabetName,
    1414                                                 unsigned unicodeCommon,
    15                                                  std::vector <UCD::codepoint_t> aboveCommon) :
    16     Alphabet(alphabetName, ClassTypeId::UnicodeMappableAlphabet),
    17     mUnicodeCommon(unicodeCommon),
    18     mAboveCommon(aboveCommon) {}
     15                                                 std::vector <UCD::codepoint_t> aboveCommon)
     16: Alphabet(std::move(alphabetName), ClassTypeId::UnicodeMappableAlphabet),
     17mUnicodeCommon(unicodeCommon),
     18mAboveCommon(std::move(aboveCommon)) {
     19
     20}
    1921
    2022UCD::codepoint_t UnicodeMappableAlphabet::toUnicode(const unsigned n) const {
     
    3436}
    3537
    36 CodeUnitAlphabet::CodeUnitAlphabet(std::string alphabetName, uint8_t bits) :
    37     Alphabet(alphabetName, ClassTypeId::CodeUnitAlphabet), mCodeUnitBits(bits) {}
     38CodeUnitAlphabet::CodeUnitAlphabet(const std::string alphabetName, uint8_t bits) :
     39Alphabet(std::move(alphabetName), ClassTypeId::CodeUnitAlphabet)
     40, mCodeUnitBits(bits) {
     41
     42}
    3843
    3944const UnicodeMappableAlphabet Unicode("Unicode", UCD::UNICODE_MAX, {});
  • icGREP/icgrep-devel/icgrep/cc/alphabet.h

    r6168 r6184  
    3838    virtual ~Alphabet() {}
    3939protected:
    40     Alphabet(std::string name, ClassTypeId k) : mAlphabetName(name), mClassTypeId(k) {}
     40    Alphabet(const std::string && name, ClassTypeId k) : mAlphabetName(std::move(name)), mClassTypeId(k) {}
    4141private:
    4242    const std::string mAlphabetName;
     
    4444};
    4545
    46 class UnicodeMappableAlphabet : public Alphabet {
     46class UnicodeMappableAlphabet final : public Alphabet {
    4747public:
    4848    //  Alphabets may be formed by some subset of Unicode characters, together
     
    5353    //  character codes (if any) above unicodeCommon - 1.
    5454   
    55     UnicodeMappableAlphabet(std::string alphabetName,
     55    UnicodeMappableAlphabet(const std::string alphabetName,
    5656                            unsigned unicodeCommon,
    5757                            std::vector <UCD::codepoint_t> aboveCommon);
     
    7070
    7171protected:
    72     UCD::codepoint_t mCharSet;
    73     UCD::codepoint_t mUnicodeCommon;
    74     std::vector <UCD::codepoint_t> mAboveCommon;
     72    const UCD::codepoint_t mUnicodeCommon;
     73    const std::vector<UCD::codepoint_t> mAboveCommon;
    7574};
    7675
    77 class CodeUnitAlphabet : public Alphabet {
     76class CodeUnitAlphabet final : public Alphabet {
    7877public:
    79     CodeUnitAlphabet(std::string alphabetName, uint8_t codeUnitBits);
     78    CodeUnitAlphabet(const std::string alphabetName, uint8_t codeUnitBits);
    8079    static inline bool classof(const Alphabet * a) {
    8180        return a->getClassTypeId() == ClassTypeId::CodeUnitAlphabet;
     
    8685
    8786private:
    88     uint8_t mCodeUnitBits;
     87    const uint8_t mCodeUnitBits;
    8988};
    9089
  • icGREP/icgrep-devel/icgrep/cc/cc_compiler.h

    r6133 r6184  
    1111#include <re/re_cc.h>
    1212#include <pablo/builder.hpp>
    13 #include <kernels/interface.h>
    14 #include <string>
    1513#include <cc/alphabet.h>
    1614
  • icGREP/icgrep-devel/icgrep/cc/multiplex_CCs.cpp

    r6140 r6184  
    3131//
    3232
    33 std::map<UCD::codepoint_t, boost::dynamic_bitset<>> computeBreakpoints(const std::vector<re::CC *> & CCs) {
    34     std::map<UCD::codepoint_t, boost::dynamic_bitset<>> breakpoints;
     33using bitset = boost::dynamic_bitset<>;
     34
     35std::map<UCD::codepoint_t, bitset> computeBreakpoints(const std::vector<re::CC *> & CCs) {
     36    std::map<UCD::codepoint_t, bitset> breakpoints;
    3537    for (unsigned i = 0; i < CCs.size(); i++) {
    3638        for (const auto range : *CCs[i]) {
     
    3941            auto f = breakpoints.find(lo);
    4042            if (f == breakpoints.end()) {
    41                 breakpoints.emplace(lo, boost::dynamic_bitset<>(CCs.size()));
     43                breakpoints.emplace(lo, bitset(CCs.size()));
    4244            }
    4345            breakpoints[lo].set(i);
    4446            f = breakpoints.find(hi + 1);
    4547            if (f == breakpoints.end()) {
    46                 breakpoints.emplace(hi+1, boost::dynamic_bitset<>(CCs.size()));
     48                breakpoints.emplace(hi+1, bitset(CCs.size()));
    4749            }
    4850            breakpoints[hi+1].set(i);
     
    6567    // Set up a map from the set of source CCs for each exclusive set to the exclusive set index.
    6668
    67     std::map<boost::dynamic_bitset<>, unsigned> CC_set_to_exclusive_set_map;
     69    std::map<bitset, unsigned> CC_set_to_exclusive_set_map;
    6870
    6971    // Entry 0 is for the characters not in any of the CCs.
    70     CC_set_to_exclusive_set_map.emplace(boost::dynamic_bitset<>(CCs.size()), 0);
     72    CC_set_to_exclusive_set_map.emplace(bitset(CCs.size()), 0);
    7173
    7274    unsigned current_exclusive_set_idx = 0;
    7375    unsigned multiplexed_bit_count = 0;
    74     boost::dynamic_bitset<> current_set(CCs.size());
     76    bitset current_set(CCs.size());
    7577   
    7678    unsigned range_lo = 0;
     
    113115
    114116
    115 MultiplexedAlphabet::MultiplexedAlphabet(std::string alphabetName, const std::vector<re::CC *> CCs)
    116     : Alphabet(alphabetName, ClassTypeId::MultiplexedAlphabet), mUnicodeSets(CCs) {
    117         if (CCs.size() > 0) {
    118             mSourceAlphabet = CCs[0]->getAlphabet();
    119             for (unsigned i = 1; i < CCs.size(); i++) {
    120                 if (CCs[i]->getAlphabet() != mSourceAlphabet) llvm::report_fatal_error("Mismatched source alphabets for Multiplexed Alphabet");
     117MultiplexedAlphabet::MultiplexedAlphabet(const std::string alphabetName, const std::vector<re::CC *> CCs)
     118: Alphabet(std::move(alphabetName), ClassTypeId::MultiplexedAlphabet)
     119, mUnicodeSets(std::move(CCs)) {
     120    if (!mUnicodeSets.empty()) {
     121        mSourceAlphabet = mUnicodeSets[0]->getAlphabet();
     122        for (unsigned i = 1; i < CCs.size(); i++) {
     123            if (CCs[i]->getAlphabet() != mSourceAlphabet) {
     124                llvm::report_fatal_error("Mismatched source alphabets for Multiplexed Alphabet");
    121125            }
    122126        }
    123         cc::doMultiplexCCs(CCs, mExclusiveSetIDs, mMultiplexedCCs);
     127    }
     128    doMultiplexCCs(CCs, mExclusiveSetIDs, mMultiplexedCCs);
    124129}
    125130
     
    145150    }
    146151
    147     const auto index = this->findTargetCCIndex(sourceCC);
     152    const auto index = findTargetCCIndex(sourceCC);
    148153    const auto exclusive_IDs = mExclusiveSetIDs[index];
    149154    re::CC * CC_union = re::makeCC(this);
  • icGREP/icgrep-devel/icgrep/cc/multiplex_CCs.h

    r6140 r6184  
    66#define MULTIPLEX_CCS_H
    77
    8 #include <vector>
    98#include <cc/alphabet.h>
    109
     
    1312namespace cc {
    1413
    15 class MultiplexedAlphabet : public Alphabet {
     14class MultiplexedAlphabet final : public Alphabet {
    1615public:
    17     MultiplexedAlphabet(std::string alphabetName, const std::vector<re::CC *> CCs);
     16    MultiplexedAlphabet(const std::string alphabetName, const std::vector<re::CC *> CCs);
     17
    1818    static inline bool classof(const Alphabet * a) {
    1919        return a->getClassTypeId() == ClassTypeId::MultiplexedAlphabet;
     
    4646    unsigned long findTargetCCIndex(const re::CC * sourceCC) const;
    4747};
     48
    4849}
    4950
  • icGREP/icgrep-devel/icgrep/character_deletion.cpp

    r6090 r6184  
    3737
    3838using namespace llvm;
    39 using namespace parabix;
    4039using namespace kernel;
    4140
     
    5453 * */
    5554
    56 StreamSetBuffer * loadBasisBits(ParabixDriver & pxDriver, Value* inputStream, Value* fileSize, int inputBufferBlocks) {
     55StreamSetBuffer * loadBasisBits(CPUDriver & pxDriver, Value* inputStream, Value* fileSize, int inputBufferBlocks) {
    5756    auto & iBuilder = pxDriver.getBuilder();
    5857
     
    7069}
    7170
    72 StreamSetBuffer * generateSwizzledDeletion(ParabixDriver & pxDriver, StreamSetBuffer * BasisBits, int inputBufferBlocks) {
     71StreamSetBuffer * generateSwizzledDeletion(CPUDriver & pxDriver, StreamSetBuffer * BasisBits, int inputBufferBlocks) {
    7372    auto & iBuilder = pxDriver.getBuilder();
    7473
     
    9291
    9392// TODO: It seems that there are still some bugs in DeleteByPEXTkernel
    94 StreamSetBuffer * generateDeletion(ParabixDriver & pxDriver, StreamSetBuffer * BasisBits, int inputBufferBlocks) {
     93StreamSetBuffer * generateDeletion(CPUDriver & pxDriver, StreamSetBuffer * BasisBits, int inputBufferBlocks) {
    9594    auto & iBuilder = pxDriver.getBuilder();
    9695
     
    112111}
    113112
    114 StreamSetBuffer * generateDeletionByCompression(ParabixDriver & pxDriver, StreamSetBuffer * BasisBits, int inputBufferBlocks) {
     113StreamSetBuffer * generateDeletionByCompression(CPUDriver & pxDriver, StreamSetBuffer * BasisBits, int inputBufferBlocks) {
    115114    auto & iBuilder = pxDriver.getBuilder();
    116115
     
    118117    Kernel * ccK = pxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "deletionMarker", std::vector<re::CC *>{re::subtractCC(re::makeByte(0, 255), re::makeCC(characterToBeDeleted))}, 8);
    119118    pxDriver.makeKernelCall(ccK, {BasisBits}, {CharacterMarkerBuffer});
     119
     120    #warning TODO: replace StreamFilterCompiler with the deposit method
    120121
    121122    StreamSetBuffer * compressedBits = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), inputBufferBlocks);
     
    151152    const int inputBufferBlocks = codegen::BufferSegments * codegen::ThreadNum * 16;
    152153
    153     ParabixDriver pxDriver("character_deletion");
     154    CPUDriver pxDriver("character_deletion");
    154155    auto & iBuilder = pxDriver.getBuilder();
    155156    Module * M = iBuilder->getModule();
     
    189190    Kernel * outK = pxDriver.addKernelInstance<StdOutKernel>(iBuilder, 8);
    190191    pxDriver.makeKernelCall(outK, {deletedByteStream}, {});
    191 
    192192    pxDriver.generatePipelineIR();
    193 
    194     pxDriver.deallocateBuffers();
    195 
    196193    iBuilder->CreateRetVoid();
    197194
  • icGREP/icgrep-devel/icgrep/character_deposit.cpp

    r6072 r6184  
    3838
    3939using namespace llvm;
    40 using namespace parabix;
    4140using namespace kernel;
    4241
     
    6463 * */
    6564
    66 StreamSetBuffer * loadBasisBits(ParabixDriver & pxDriver, Value* inputStream, Value* fileSize, int bufferBlocks) {
     65StreamSetBuffer * loadBasisBits(CPUDriver & pxDriver, Value* inputStream, Value* fileSize, int bufferBlocks) {
    6766    auto & iBuilder = pxDriver.getBuilder();
    6867
     
    7978}
    8079
    81 StreamSetBuffer * generateSwizzledDeposit(ParabixDriver & pxDriver, StreamSetBuffer * BasisBits, int bufferBlocks) {
     80StreamSetBuffer * generateSwizzledDeposit(CPUDriver & pxDriver, StreamSetBuffer * BasisBits, int bufferBlocks) {
    8281    auto & iBuilder = pxDriver.getBuilder();
    8382
     
    109108}
    110109
    111 StreamSetBuffer * generateBitStreamDeposit(ParabixDriver & pxDriver, StreamSetBuffer * BasisBits, int bufferBlocks) {
     110StreamSetBuffer * generateBitStreamDeposit(CPUDriver & pxDriver, StreamSetBuffer * BasisBits, int bufferBlocks) {
    112111    auto & iBuilder = pxDriver.getBuilder();
    113112
     
    168167    const auto bufferBlocks = codegen::ThreadNum * codegen::SegmentSize;
    169168
    170     ParabixDriver pxDriver("character_deletion");
     169    CPUDriver pxDriver("character_deletion");
    171170    auto & iBuilder = pxDriver.getBuilder();
    172171    Module * M = iBuilder->getModule();
     
    204203
    205204    pxDriver.generatePipelineIR();
    206 
    207     pxDriver.deallocateBuffers();
    208 
    209205    iBuilder->CreateRetVoid();
    210206
  • icGREP/icgrep-devel/icgrep/editd/editd.cpp

    r6047 r6184  
    2929#include <fcntl.h>
    3030#include <mutex>
    31 #include <boost/uuid/sha1.hpp>
    3231#include <editd/editd_cpu_kernel.h>
    33 
    34 #ifdef CUDA_ENABLED
    35 #include <toolchain/NVPTXDriver.h>
    36 #include <editd/editd_gpu_kernel.h>
    37 #include <editd/EditdCudaDriver.h>
    38 #endif
     32#include <kernels/pipeline_builder.h>
     33#include <util/aligned_allocator.h>
    3934
    4035using namespace llvm;
     
    6156using namespace kernel;
    6257using namespace pablo;
    63 using namespace parabix;
    64 
    65 #ifdef CUDA_ENABLED
    66 const static std::string PTXFilename = "editd.ptx";
    67 #endif
    6858
    6959struct matchPosition
     
    171161}
    172162
    173 inline static std::string sha1sum(const std::string & str) {
    174     char buffer[41];    // 40 hex-digits and the terminating null
    175     uint32_t digest[5]; // 160 bits in total
    176     boost::uuids::detail::sha1 sha1;
    177     sha1.process_bytes(str.c_str(), str.size());
    178     sha1.get_digest(digest);
    179     snprintf(buffer, sizeof(buffer), "%.8x%.8x%.8x%.8x%.8x",
    180              digest[0], digest[1], digest[2], digest[3], digest[4]);
    181     return std::string(buffer);
    182 }
    183 
    184 std::string createName(const std::vector<std::string> & patterns) {
    185     std::string name = "";
    186     for(unsigned i=0; i<patterns.size(); i++)
    187         name += patterns[i];
    188     return name + std::to_string(editDistance);
    189 }
    190 
    191 class PatternKernel final: public pablo::PabloKernel {
    192 public:
    193     PatternKernel(const std::unique_ptr<kernel::KernelBuilder> & b, const std::vector<std::string> & patterns);
    194     std::string makeSignature(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
    195     bool isCachable() const override { return true;}
    196 protected:
    197     void generatePabloMethod() override;
    198 private:
    199     const std::vector<std::string> & mPatterns;
    200 };
    201 
    202 PatternKernel::PatternKernel(const std::unique_ptr<kernel::KernelBuilder> & b, const std::vector<std::string> & patterns)
    203 : PabloKernel(b, sha1sum(createName(patterns)), {{b->getStreamSetTy(4), "pat"}}, {{b->getStreamSetTy(editDistance + 1), "E"}})
    204 , mPatterns(patterns) { 
    205 }
    206 
    207 std::string PatternKernel::makeSignature(const std::unique_ptr<kernel::KernelBuilder> &) {
    208     return getName();
    209 }
    210 
    211 void PatternKernel::generatePabloMethod() {
    212     PabloBuilder entry(getEntryScope());
    213     Var * const pat = getInputStreamVar("pat");
    214     PabloAST * basisBits[4];
    215     basisBits[0] = entry.createExtract(pat, 0, "A");
    216     basisBits[1] = entry.createExtract(pat, 1, "C");
    217     basisBits[2] = entry.createExtract(pat, 2, "T");
    218     basisBits[3] = entry.createExtract(pat, 3, "G");
    219     re::Pattern_Compiler pattern_compiler(*this);
    220     if (optPosition == 0) optPosition = editDistance + 6;
    221     pattern_compiler.compile(mPatterns, entry, basisBits, editDistance, optPosition, stepSize);
    222 }
    223 
    224 std::mutex store_mutex;
    225 extern "C" void wrapped_report_pos(size_t match_pos, int dist) {
    226     struct matchPosition curMatch;
    227     curMatch.pos = match_pos;
    228     curMatch.dist = dist;
    229 
    230     store_mutex.lock();
    231     matchList.push_back(curMatch);
    232     if(ShowPositions)
    233         std::cout << "pos: " << match_pos << ", dist:" << dist << "\n";
    234     store_mutex.unlock();
    235 }
    236 
    237 void editdPipeline(ParabixDriver & pxDriver, const std::vector<std::string> & patterns) {
    238 
    239     auto & idb = pxDriver.getBuilder();
    240     Module * const m = idb->getModule();
    241     Type * const sizeTy = idb->getSizeTy();
    242     Type * const voidTy = idb->getVoidTy();
    243     Type * const inputType = idb->getIntNTy(1)->getPointerTo();
    244 
    245     idb->LinkFunction("wrapped_report_pos", &wrapped_report_pos);
    246 
    247     const unsigned segmentSize = codegen::SegmentSize;
    248     const unsigned bufferSegments = codegen::BufferSegments * codegen::ThreadNum;
    249 
    250     Function * const main = cast<Function>(m->getOrInsertFunction("Main", voidTy, inputType, sizeTy, nullptr));
    251     main->setCallingConv(CallingConv::C);
    252     auto args = main->arg_begin();
    253     Value * const inputStream = &*(args++);
    254     inputStream->setName("input");
    255     Value * const fileSize = &*(args++);
    256     fileSize->setName("fileSize");
    257     idb->SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", main,0));
    258 
    259     auto ChStream = pxDriver.addBuffer<ExternalBuffer>(idb, idb->getStreamSetTy(4));
    260     auto mmapK = pxDriver.addKernelInstance<MemorySourceKernel>(idb, 4, 1);
    261     mmapK->setInitialArguments({inputStream, fileSize});
    262     pxDriver.makeKernelCall(mmapK, {}, {ChStream});
    263 
    264     auto MatchResults = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(editDistance + 1), segmentSize * bufferSegments);
    265     auto editdk = pxDriver.addKernelInstance<PatternKernel>(idb, patterns);
    266     pxDriver.makeKernelCall(editdk, {ChStream}, {MatchResults});
    267 
    268     auto editdScanK = pxDriver.addKernelInstance<editdScanKernel>(idb, editDistance);
    269     pxDriver.makeKernelCall(editdScanK, {MatchResults}, {});
    270 
    271     pxDriver.generatePipelineIR();
    272     pxDriver.deallocateBuffers();
    273     idb->CreateRetVoid();
    274 
    275     pxDriver.finalizeObject();
    276 }
     163typedef void (*preprocessFunctionType)(char * output_data, size_t output_size, const uint32_t fd);
     164
     165static char * chStream;
     166static size_t size;
     167
     168//class PreprocessPipeline : public PipelineKernel {
     169//public:
     170//    PreprocessPipeline(EngineInstance & driver, StreamSet * CCResults)
     171//     : PipelineKernel(driver,
     172//    {},
     173//    {Binding{"CCResults", CCResults}},
     174//    {Binding{driver.getBuilder()->getInt32Ty(), "fileDescriptor"}},
     175//    {}) {
     176
     177//    }
     178//};
    277179
    278180class PreprocessKernel final: public pablo::PabloKernel {
    279181public:
    280     PreprocessKernel(const std::unique_ptr<kernel::KernelBuilder> & b);
     182    PreprocessKernel(const std::unique_ptr<KernelBuilder> & b, StreamSet * BasisBits, StreamSet * CCResults);
    281183    bool isCachable() const override { return true; }
    282184    bool hasSignature() const override { return false; }
     
    285187};
    286188
    287 PreprocessKernel::PreprocessKernel(const std::unique_ptr<kernel::KernelBuilder> & b)
    288 : PabloKernel(b, "ccc", {{b->getStreamSetTy(8), "basis"}}, {{b->getStreamSetTy(4), "pat"}}) {
     189PreprocessKernel::PreprocessKernel(const std::unique_ptr<KernelBuilder> & b, StreamSet * BasisBits, StreamSet * CCResults)
     190: PabloKernel(b, "editd_preprocess", {{"basis", BasisBits}}, {{"pat", CCResults}}) {
    289191
    290192}
     
    304206}
    305207
    306 void preprocessPipeline(ParabixDriver & pxDriver) {
    307 
    308     auto & iBuilder = pxDriver.getBuilder();
    309     Module * m = iBuilder->getModule();
    310 
    311     Type * const voidTy = iBuilder->getVoidTy();
    312     Type * const int32Ty = iBuilder->getInt32Ty();
    313     Type * const outputType = PointerType::get(ArrayType::get(iBuilder->getBitBlockType(), 4), 0);
    314 
    315     const unsigned segmentSize = codegen::SegmentSize;
    316     const unsigned bufferSegments = codegen::BufferSegments * codegen::ThreadNum;
    317 
    318     Function * const main = cast<Function>(m->getOrInsertFunction("Main", voidTy, int32Ty, outputType, nullptr));
    319     main->setCallingConv(CallingConv::C);
    320     Function::arg_iterator args = main->arg_begin();
    321 
    322     Value * const fileDescriptor = &*(args++);
    323     fileDescriptor->setName("fileDescriptor");
    324     Value * const outputStream = &*(args++);
    325     outputStream->setName("output");
    326 
    327     iBuilder->SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", main));
    328 
    329     auto ByteStream = pxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
    330 
    331     auto mmapK = pxDriver.addKernelInstance<MMapSourceKernel>(iBuilder);
    332     mmapK->setInitialArguments({fileDescriptor});
    333     pxDriver.makeKernelCall(mmapK, {}, {ByteStream});
    334 
    335     auto BasisBits = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), segmentSize * bufferSegments);
    336     auto s2pk = pxDriver.addKernelInstance<S2PKernel>(iBuilder);
    337     pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
    338 
    339     auto CCResults = pxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(4), outputStream);
    340     auto ccck = pxDriver.addKernelInstance<PreprocessKernel>(iBuilder);
    341     // NOTE: CCResults are never consumed because they are written directly into an external buffer. This may make analysis difficult.
    342     pxDriver.makeKernelCall(ccck, {BasisBits}, {CCResults});
    343 
    344     pxDriver.generatePipelineIR();
    345     pxDriver.deallocateBuffers();
    346     iBuilder->CreateRetVoid();
    347 
    348     pxDriver.finalizeObject();
    349 }
    350 
    351 void multiEditdPipeline(ParabixDriver & pxDriver) {
    352     auto & idb = pxDriver.getBuilder();
    353     Module * const m = idb->getModule();
    354     Type * const voidTy = idb->getVoidTy();
    355     Type * const int32Ty = idb->getInt32Ty();
    356 
    357     idb->LinkFunction("wrapped_report_pos", &wrapped_report_pos);
    358 
    359     const unsigned segmentSize = codegen::SegmentSize;
    360     const unsigned bufferSegments = codegen::BufferSegments * codegen::ThreadNum;
    361 
    362     Function * const main = cast<Function>(m->getOrInsertFunction("Main", voidTy, int32Ty, nullptr));
    363     main->setCallingConv(CallingConv::C);
    364     Function::arg_iterator args = main->arg_begin();
    365 
    366     Value * const fileDescriptor = &*(args++);
    367     fileDescriptor->setName("fileDescriptor");
    368 
    369     idb->SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", main,0));
    370 
    371     auto ByteStream = pxDriver.addBuffer<ExternalBuffer>(idb, idb->getStreamSetTy(1, 8));
    372 
    373     auto mmapK = pxDriver.addKernelInstance<MMapSourceKernel>(idb);
    374     mmapK->setInitialArguments({fileDescriptor});
    375     pxDriver.makeKernelCall(mmapK, {}, {ByteStream});
    376 
    377     auto ChStream = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(4), segmentSize * bufferSegments);
    378     auto ccck = pxDriver.addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, "ccc",
    379         std::vector<re::CC *>{re::makeCC(re::makeCC(0x41), re::makeCC(0x61)),
    380                               re::makeCC(re::makeCC(0x43), re::makeCC(0x63)),
    381                               re::makeCC(re::makeCC(0x54), re::makeCC(0x74)),
    382                               re::makeCC(re::makeCC(0x47), re::makeCC(0x67))});
    383     pxDriver.makeKernelCall(ccck, {ByteStream}, {ChStream});
    384 
    385     const auto n = pattGroups.size();
    386    
    387     std::vector<StreamSetBuffer *> MatchResultsBufs(n);
    388    
    389     for(unsigned i = 0; i < n; ++i){
    390         auto MatchResults = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(editDistance + 1), segmentSize * bufferSegments);
    391         auto editdk = pxDriver.addKernelInstance<PatternKernel>(idb, pattGroups[i]);
    392         pxDriver.makeKernelCall(editdk, {ChStream}, {MatchResults});
    393         MatchResultsBufs[i] = MatchResults;
    394     }
    395     StreamSetBuffer * MergedResults = MatchResultsBufs[0];
    396     if (n > 1) {
    397         MergedResults = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(editDistance + 1), segmentSize * bufferSegments);
    398         kernel::Kernel * streamsMergeK = pxDriver.addKernelInstance<kernel::StreamsMerge>(idb, editDistance + 1, n);
    399         pxDriver.makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
    400     }
    401 
    402     auto editdScanK = pxDriver.addKernelInstance<editdScanKernel>(idb, editDistance);
    403     pxDriver.makeKernelCall(editdScanK, {MergedResults}, {});
    404 
    405     pxDriver.generatePipelineIR();
    406     pxDriver.deallocateBuffers();
    407     idb->CreateRetVoid();
    408 
    409     pxDriver.finalizeObject();
    410 }
    411 
    412 
    413 void editdIndexPatternPipeline(ParabixDriver & pxDriver, unsigned patternLen) {
    414 
    415     auto & idb = pxDriver.getBuilder();
    416     Module * const m = idb->getModule();
    417     Type * const sizeTy = idb->getSizeTy();
    418     Type * const voidTy = idb->getVoidTy();
    419     Type * const inputType = idb->getIntNTy(1)->getPointerTo();
    420     Type * const patternPtrTy = PointerType::get(idb->getInt8Ty(), 0);
    421 
    422     idb->LinkFunction("wrapped_report_pos", &wrapped_report_pos);
    423 
    424     const unsigned segmentSize = codegen::SegmentSize;
    425     const unsigned bufferSegments = codegen::BufferSegments * codegen::ThreadNum;
    426 
    427     Function * const main = cast<Function>(m->getOrInsertFunction("Main", voidTy, inputType, sizeTy, patternPtrTy, nullptr));
    428     main->setCallingConv(CallingConv::C);
    429     auto args = main->arg_begin();
    430     Value * const inputStream = &*(args++);
    431     inputStream->setName("input");
    432     Value * const fileSize = &*(args++);
    433     fileSize->setName("fileSize");
    434     Value * const pattStream = &*(args++);
    435     pattStream->setName("pattStream");
    436     idb->SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", main,0));
    437 
    438     auto ChStream = pxDriver.addBuffer<ExternalBuffer>(idb, idb->getStreamSetTy(4));
    439     auto mmapK = pxDriver.addKernelInstance<MemorySourceKernel>(idb, 4, 1);
    440     mmapK->setInitialArguments({inputStream, fileSize});
    441     pxDriver.makeKernelCall(mmapK, {}, {ChStream});
    442 
    443     auto MatchResults = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(editDistance + 1), segmentSize * bufferSegments);
    444     auto editdk = pxDriver.addKernelInstance<kernel::editdCPUKernel>(idb, editDistance, patternLen, groupSize);
    445 
    446     const unsigned numOfCarries = patternLen * (editDistance + 1) * 4 * groupSize;
    447     Type * strideCarryTy = ArrayType::get(idb->getBitBlockType(), numOfCarries);
    448     Value * strideCarry = idb->CreateAlloca(strideCarryTy);
    449     idb->CreateStore(Constant::getNullValue(strideCarryTy), strideCarry);
    450 
    451     editdk->setInitialArguments({pattStream, strideCarry});
    452     pxDriver.makeKernelCall(editdk, {ChStream}, {MatchResults});
    453 
    454     auto editdScanK = pxDriver.addKernelInstance<editdScanKernel>(idb, editDistance);
    455     pxDriver.makeKernelCall(editdScanK, {MatchResults}, {});
    456 
    457     pxDriver.generatePipelineIR();
    458 
    459     idb->CreateRetVoid();
    460 
    461     pxDriver.finalizeObject();
    462 }
    463 
    464 typedef void (*preprocessFunctionType)(const int fd, char * output_data);
    465 
    466 typedef void (*editdFunctionType)(char * byte_data, size_t filesize);
    467 
    468 typedef void (*multiEditdFunctionType)(const int fd);
    469 
    470 typedef void (*editdIndexFunctionType)(char * byte_data, size_t filesize, const char * pattern);
    471 
    472 static char * chStream;
    473 static size_t size;
     208preprocessFunctionType preprocessPipeline(CPUDriver & pxDriver) {
     209    StreamSet * const CCResults = pxDriver.CreateStreamSet(4);
     210    auto & b = pxDriver.getBuilder();
     211    Type * const int32Ty = b->getInt32Ty();
     212    auto P = pxDriver.makePipelineWithIO({}, {{"CCResults", CCResults}}, {{int32Ty, "fileDescriptor"}});
     213    Scalar * const fileDescriptor = P->getInputScalar("fileDescriptor");
     214    StreamSet * const ByteStream = P->CreateStreamSet(1, 8);
     215    P->CreateKernelCall<MMapSourceKernel>(fileDescriptor, ByteStream);
     216    StreamSet * const BasisBits = P->CreateStreamSet(8);
     217    P->CreateKernelCall<S2PKernel>(ByteStream, BasisBits);
     218    P->CreateKernelCall<PreprocessKernel>(BasisBits, CCResults);
     219    return reinterpret_cast<preprocessFunctionType>(P->compile());
     220}
    474221
    475222size_t file_size(const int fd) {
     
    481228}
    482229
    483 char * preprocess(preprocessFunctionType fn_ptr) {
     230#define ALIGNMENT (32UL)
     231
     232inline bool is_power_2(const unsigned n) {
     233    return ((n & (n - 1)) == 0) && n;
     234}
     235
     236inline unsigned round_up_to(const unsigned x, const unsigned y) {
     237    assert(is_power_2(y));
     238    return (x + y - 1) & -y;
     239}
     240
     241char * preprocess(preprocessFunctionType preprocess) {
    484242    std::string fileName = inputFiles[0];
    485243    const int fd = open(inputFiles[0].c_str(), O_RDONLY);
    486244    if (LLVM_UNLIKELY(fd == -1)) {
    487         std::cerr << "Error: cannot open " << fileName << " for processing. Skipped.\n";
     245        std::cerr << "Error: cannot open " << fileName << " for processing.\n";
    488246        exit(-1);
    489247    }
    490248    size = file_size(fd);
    491     int ret = posix_memalign((void**)&chStream, 32, size);
    492     if (ret) {
    493         std::cerr << "Cannot allocate memory for output.\n";
    494         exit(-2);
    495     }
    496     fn_ptr(fd, chStream);
     249
     250    // Given a 8-bit bytestream of length n, we need space for 4 bitstreams of length n ...
     251    AlignedAllocator<char, ALIGNMENT> alloc;
     252    const auto n = round_up_to(size, 8 * ALIGNMENT);
     253    chStream = alloc.allocate((4 * n) / 8);
     254    preprocess(chStream, n, fd);
    497255    close(fd);
    498256    return chStream;
    499257}
    500258
    501 void editd(editdFunctionType fn_ptr, char * inputStream, size_t size) {
    502     fn_ptr(inputStream, size);
     259
     260std::string createName(const std::vector<std::string> & patterns) {
     261    std::string name = "";
     262    for(unsigned i=0; i<patterns.size(); i++)
     263        name += patterns[i];
     264    return name + std::to_string(editDistance);
     265}
     266
     267class PatternKernel final: public pablo::PabloKernel {
     268public:
     269    PatternKernel(const std::unique_ptr<KernelBuilder> & b, const std::vector<std::string> & patterns, StreamSet * pat, StreamSet * E);
     270    std::string makeSignature(const std::unique_ptr<KernelBuilder> &) override;
     271    bool isCachable() const override { return true;}
     272protected:
     273    void generatePabloMethod() override;
     274private:
     275    const std::vector<std::string> & mPatterns;
     276};
     277
     278PatternKernel::PatternKernel(const std::unique_ptr<KernelBuilder> & b, const std::vector<std::string> & patterns, StreamSet * pat, StreamSet * E)
     279: PabloKernel(b, getStringHash(createName(patterns)),
     280{{"pat", pat}},
     281{{"E", E}})
     282, mPatterns(patterns) {
     283
     284}
     285
     286std::string PatternKernel::makeSignature(const std::unique_ptr<KernelBuilder> &) {
     287    return getName();
     288}
     289
     290void PatternKernel::generatePabloMethod() {
     291    PabloBuilder entry(getEntryScope());
     292    Var * const pat = getInputStreamVar("pat");
     293    PabloAST * basisBits[4];
     294    basisBits[0] = entry.createExtract(pat, 0, "A");
     295    basisBits[1] = entry.createExtract(pat, 1, "C");
     296    basisBits[2] = entry.createExtract(pat, 2, "T");
     297    basisBits[3] = entry.createExtract(pat, 3, "G");
     298    re::Pattern_Compiler pattern_compiler(*this);
     299    if (optPosition == 0) optPosition = editDistance + 6;
     300    pattern_compiler.compile(mPatterns, entry, basisBits, editDistance, optPosition, stepSize);
     301}
     302
     303std::mutex store_mutex;
     304extern "C" void wrapped_report_pos(size_t match_pos, int dist) {
     305    struct matchPosition curMatch;
     306    curMatch.pos = match_pos;
     307    curMatch.dist = dist;
     308
     309    store_mutex.lock();
     310    matchList.push_back(curMatch);
     311    if(ShowPositions)
     312        std::cout << "pos: " << match_pos << ", dist:" << dist << "\n";
     313    store_mutex.unlock();
     314}
     315
     316typedef void (*editdFunctionType)(char * byte_data, size_t filesize);
     317
     318editdFunctionType editdPipeline(CPUDriver & pxDriver, const std::vector<std::string> & patterns) {
     319    auto & b = pxDriver.getBuilder();
     320    Type * const sizeTy = b->getSizeTy();
     321    Type * const inputType = b->getIntNTy(1)->getPointerTo();
     322    auto P = pxDriver.makePipeline({Binding{inputType, "input"}, Binding{sizeTy, "fileSize"}});
     323    Scalar * const inputStream = P->getInputScalar("input");
     324    Scalar * const fileSize = P->getInputScalar("fileSize");
     325    b->LinkFunction("wrapped_report_pos", wrapped_report_pos);
     326    StreamSet * const ChStream = P->CreateStreamSet(4);
     327    P->CreateKernelCall<MemorySourceKernel>(inputStream, fileSize, ChStream);
     328    StreamSet * const MatchResults = P->CreateStreamSet(editDistance + 1);
     329    P->CreateKernelCall<PatternKernel>(patterns, ChStream, MatchResults);
     330    P->CreateKernelCall<editdScanKernel>(MatchResults);
     331    return reinterpret_cast<editdFunctionType>(P->compile());
     332}
     333
     334typedef void (*multiEditdFunctionType)(const int fd);
     335
     336multiEditdFunctionType multiEditdPipeline(CPUDriver & pxDriver) {
     337
     338    auto & b = pxDriver.getBuilder();
     339    auto P = pxDriver.makePipeline({Binding{b->getInt32Ty(), "fileDescriptor"}});
     340    b->LinkFunction("wrapped_report_pos", wrapped_report_pos);
     341    Scalar * const fileDescriptor = P->getInputScalar("fileDescriptor");
     342
     343    StreamSet * const ByteStream = P->CreateStreamSet(1, 8);
     344    P->CreateKernelCall<MMapSourceKernel>(fileDescriptor, ByteStream);
     345
     346    std::vector<re::CC *> ccs;
     347    ccs.emplace_back(re::makeCC(re::makeCC(0x41), re::makeCC(0x61)));
     348    ccs.emplace_back(re::makeCC(re::makeCC(0x43), re::makeCC(0x63)));
     349    ccs.emplace_back(re::makeCC(re::makeCC(0x47), re::makeCC(0x67)));
     350    ccs.emplace_back(re::makeCC(re::makeCC(0x54), re::makeCC(0x74)));
     351
     352    StreamSet * const ChStream = P->CreateStreamSet(4);
     353    P->CreateKernelCall<DirectCharacterClassKernelBuilder>("editd_cc", ccs, ByteStream, ChStream);
     354
     355    const auto n = pattGroups.size();
     356    std::vector<StreamSet *> MatchResults(n);
     357    for(unsigned i = 0; i < n; ++i){
     358        MatchResults[i] = P->CreateStreamSet(editDistance + 1);
     359        P->CreateKernelCall<PatternKernel>(pattGroups[i], ChStream, MatchResults[i]);
     360    }
     361
     362    StreamSet * MergedResults = MatchResults[0];
     363    if (n > 1) {
     364        StreamSet * const MergedResults = P->CreateStreamSet();
     365        P->CreateKernelCall<StreamsMerge>(MatchResults, MergedResults);
     366    }
     367    P->CreateKernelCall<editdScanKernel>(MergedResults);
     368
     369    return reinterpret_cast<multiEditdFunctionType>(P->compile());
     370}
     371
     372typedef void (*editdIndexFunctionType)(char * byte_data, size_t filesize, const char * pattern);
     373
     374editdIndexFunctionType editdIndexPatternPipeline(CPUDriver & pxDriver, unsigned patternLen) {
     375
     376    auto & b = pxDriver.getBuilder();
     377
     378    Type * const inputType = b->getIntNTy(1)->getPointerTo();
     379    Type * const sizeTy = b->getSizeTy();
     380    Type * const patternPtrTy = PointerType::get(b->getInt8Ty(), 0);
     381
     382    auto P = pxDriver.makePipeline({Binding{inputType, "input"}, Binding{sizeTy, "fileSize"}, Binding{patternPtrTy, "pattStream"}});
     383    Scalar * const inputStream = P->getInputScalar("input");
     384    Scalar * const fileSize = P->getInputScalar("fileSize");
     385    Scalar * const pattStream = P->getInputScalar("pattStream");
     386
     387    b->LinkFunction("wrapped_report_pos", wrapped_report_pos);
     388
     389    StreamSet * const ChStream = P->CreateStreamSet(4);
     390    P->CreateKernelCall<MemorySourceKernel>(inputStream, fileSize, ChStream);
     391
     392    StreamSet * const MatchResults = P->CreateStreamSet(editDistance + 1);
     393
     394    P->CreateKernelCall<editdCPUKernel>(patternLen, groupSize, pattStream, ChStream, MatchResults);
     395
     396    P->CreateKernelCall<editdScanKernel>(MatchResults);
     397
     398    return reinterpret_cast<editdIndexFunctionType>(P->compile());
    503399}
    504400
     
    515411    while (groupIdx < pattGroups.size()){
    516412
    517         ParabixDriver pxDriver("editd");
    518         editdPipeline(pxDriver, pattGroups[groupIdx]);
    519         auto editd_ptr = reinterpret_cast<editdFunctionType>(pxDriver.getMain());
    520         editd(editd_ptr, chStream, size);
     413        CPUDriver pxDriver("editd");
     414        auto editd = editdPipeline(pxDriver, pattGroups[groupIdx]);
     415        editd(chStream, size);
    521416
    522417        count_mutex.lock();
     
    529424}
    530425
    531 #ifdef CUDA_ENABLED
    532 void editdGPUCodeGen(unsigned patternLen){
    533     NVPTXDriver pxDriver("editd");
    534     auto & iBuilder = pxDriver.getBuilder();
    535     Module * M = iBuilder->getModule();
    536 
    537     const unsigned segmentSize = codegen::SegmentSize;
    538 
    539     Type * const mBitBlockType = iBuilder->getBitBlockType();
    540     Type * const inputSizeTy = PointerType::get(iBuilder->getSizeTy(), 1);
    541     Type * const int32ty = iBuilder->getInt32Ty();
    542     Type * const voidTy = Type::getVoidTy(M->getContext());
    543     Type * const inputTy = PointerType::get(ArrayType::get(mBitBlockType, 4), 1);
    544     Type * const patternPtrTy = PointerType::get(iBuilder->getInt8Ty(), 1);
    545     Type * const outputTy = PointerType::get(ArrayType::get(mBitBlockType, editDistance+1), 1);
    546     Type * const stridesTy = PointerType::get(int32ty, 1);
    547 
    548     Function * const main = cast<Function>(M->getOrInsertFunction("Main", voidTy, inputTy, inputSizeTy, patternPtrTy, outputTy, stridesTy, nullptr));
    549     main->setCallingConv(CallingConv::C);
    550     auto args = main->arg_begin();
    551 
    552     Value * const inputStream = &*(args++);
    553     inputStream->setName("input");
    554     Value * const inputSizePtr = &*(args++);
    555     inputSizePtr->setName("inputSizePtr");
    556     Value * const pattStream = &*(args++);
    557     pattStream->setName("pattStream");
    558     Value * const resultStream = &*(args++);
    559     resultStream->setName("resultStream");
    560     Value * const stridesPtr = &*(args++);
    561     stridesPtr->setName("stridesPtr");
    562 
    563     iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main,0));
    564 
    565     Function * tidFunc = M->getFunction("llvm.nvvm.read.ptx.sreg.tid.x");
    566     Value * tid = iBuilder->CreateCall(tidFunc);
    567     Function * bidFunc = cast<Function>(M->getOrInsertFunction("llvm.nvvm.read.ptx.sreg.ctaid.x", int32ty, nullptr));
    568     Value * bid = iBuilder->CreateCall(bidFunc);
    569 
    570     Value * inputThreadPtr = iBuilder->CreateGEP(inputStream, tid);
    571     Value * strides = iBuilder->CreateLoad(stridesPtr);
    572     Value * outputBlocks = iBuilder->CreateMul(strides, ConstantInt::get(int32ty, iBuilder->getStride() / iBuilder->getBitBlockWidth()));
    573     Value * resultStreamPtr = iBuilder->CreateGEP(resultStream, iBuilder->CreateAdd(iBuilder->CreateMul(bid, outputBlocks), tid));
    574     Value * inputSize = iBuilder->CreateLoad(inputSizePtr);
    575 
    576     auto CCStream = pxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(4), 1);
    577     auto sourceK = pxDriver.addKernelInstance<kernel::MemorySourceKernel>(iBuilder, inputTy, segmentSize);
    578     sourceK->setInitialArguments({inputThreadPtr, inputSize});
    579     pxDriver.makeKernelCall(sourceK, {}, {CCStream});
    580 
    581     auto ResultStream = pxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(editDistance+1), resultStreamPtr, 1);
    582     auto editdk = pxDriver.addKernelInstance<kernel::editdGPUKernel>(iBuilder, editDistance, patternLen, groupSize);
    583      
    584     const unsigned numOfCarries = patternLen * (editDistance + 1) * 4 * groupSize;
    585     Type * strideCarryTy = ArrayType::get(mBitBlockType, numOfCarries);
    586     Value * strideCarry = iBuilder->CreateAlloca(strideCarryTy);
    587     iBuilder->CreateStore(Constant::getNullValue(strideCarryTy), strideCarry);
    588 
    589     editdk->setInitialArguments({pattStream, strideCarry});
    590     pxDriver.makeKernelCall(editdk, {CCStream}, {ResultStream});
    591 
    592     pxDriver.generatePipelineIR();
    593     pxDriver.deallocateBuffers();
    594     iBuilder->CreateRetVoid();
    595 
    596     pxDriver.finalizeObject();
    597 
    598 }
    599 
    600 void mergeGPUCodeGen(){
    601     NVPTXDriver pxDriver("merge");
    602     auto & iBuilder = pxDriver.getBuilder();
    603     Module * M = iBuilder->getModule();
    604 
    605     Type * const mBitBlockType = iBuilder->getBitBlockType();
    606     Type * const int32ty = iBuilder->getInt32Ty();
    607     Type * const voidTy = Type::getVoidTy(M->getContext());
    608     Type * const resultTy = PointerType::get(ArrayType::get(mBitBlockType, editDistance+1), 1);
    609     Type * const stridesTy = PointerType::get(int32ty, 1);
    610 
    611     Function * const main = cast<Function>(M->getOrInsertFunction("Main", voidTy, resultTy, stridesTy, nullptr));
    612     main->setCallingConv(CallingConv::C);
    613     Function::arg_iterator args = main->arg_begin();
    614 
    615     Value * const resultStream = &*(args++);
    616     resultStream->setName("resultStream");
    617     Value * const stridesPtr = &*(args++);
    618     stridesPtr->setName("stridesPtr");
    619 
    620     BasicBlock * entryBlock = BasicBlock::Create(iBuilder->getContext(), "entryBlock", main, 0);
    621     BasicBlock * strideLoopCond = BasicBlock::Create(iBuilder->getContext(), "strideLoopCond", main, 0);
    622     BasicBlock * strideLoopBody = BasicBlock::Create(iBuilder->getContext(), "strideLoopBody", main, 0);
    623     BasicBlock * stridesDone = BasicBlock::Create(iBuilder->getContext(), "stridesDone", main, 0);
    624 
    625     iBuilder->SetInsertPoint(entryBlock);
    626 
    627     Function * tidFunc = M->getFunction("llvm.nvvm.read.ptx.sreg.tid.x");
    628     Value * tid = iBuilder->CreateCall(tidFunc);
    629     Function * bidFunc = cast<Function>(M->getOrInsertFunction("llvm.nvvm.read.ptx.sreg.ctaid.x", int32ty, nullptr));
    630     Value * bid = iBuilder->CreateCall(bidFunc);
    631 
    632     Value * strides = iBuilder->CreateLoad(stridesPtr);
    633     Value * strideBlocks = ConstantInt::get(int32ty, iBuilder->getStride() / iBuilder->getBitBlockWidth());
    634     Value * outputBlocks = iBuilder->CreateMul(strides, strideBlocks);
    635     Value * resultStreamPtr = iBuilder->CreateGEP(resultStream, tid);
    636 
    637     iBuilder->CreateBr(strideLoopCond);
    638     iBuilder->SetInsertPoint(strideLoopCond);
    639     PHINode * strideNo = iBuilder->CreatePHI(int32ty, 2, "strideNo");
    640     strideNo->addIncoming(ConstantInt::get(int32ty, 0), entryBlock);
    641     Value * notDone = iBuilder->CreateICmpULT(strideNo, strides);
    642     iBuilder->CreateCondBr(notDone, strideLoopBody, stridesDone);
    643 
    644     iBuilder->SetInsertPoint(strideLoopBody);
    645     Value * myResultStreamPtr = iBuilder->CreateGEP(resultStreamPtr, {iBuilder->CreateMul(strideBlocks, strideNo)});
    646     Value * myResultStream = iBuilder->CreateLoad(iBuilder->CreateGEP(myResultStreamPtr, {iBuilder->getInt32(0), bid}));
    647     for (int i=1; i<codegen::GroupNum; i++){
    648         Value * nextStreamPtr = iBuilder->CreateGEP(myResultStreamPtr, {iBuilder->CreateMul(outputBlocks, iBuilder->getInt32(i)), bid});
    649         myResultStream = iBuilder->CreateOr(myResultStream, iBuilder->CreateLoad(nextStreamPtr));
    650     }
    651     iBuilder->CreateStore(myResultStream, iBuilder->CreateGEP(myResultStreamPtr, {iBuilder->getInt32(0), bid}));
    652     strideNo->addIncoming(iBuilder->CreateAdd(strideNo, ConstantInt::get(int32ty, 1)), strideLoopBody);
    653     iBuilder->CreateBr(strideLoopCond);
    654 
    655     iBuilder->SetInsertPoint(stridesDone);
    656     iBuilder->CreateRetVoid();
    657 
    658     pxDriver.finalizeObject();
    659 
    660 }
    661 #endif
    662 
    663 editdFunctionType editdScanCPUCodeGen(ParabixDriver & pxDriver) {
    664    
    665     auto & iBuilder = pxDriver.getBuilder();
    666     Module * M = iBuilder->getModule();
    667 
    668     Type * mBitBlockType = iBuilder->getBitBlockType();
    669     Type * const size_ty = iBuilder->getSizeTy();
    670     Type * const voidTy = Type::getVoidTy(M->getContext());
    671     Type * const inputType = PointerType::get(ArrayType::get(mBitBlockType, editDistance+1), 0);
    672 
    673     Function * const main = cast<Function>(M->getOrInsertFunction("Main", voidTy, inputType, size_ty, nullptr));
    674     main->setCallingConv(CallingConv::C);
    675     iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
    676     Function::arg_iterator args = main->arg_begin();
    677     Value * const inputStream = &*(args++);
    678     inputStream->setName("input");
    679     Value * const fileSize = &*(args++);
    680     fileSize->setName("fileSize");
    681 
    682     StreamSetBuffer * MatchResults = pxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(editDistance + 1));
    683     kernel::Kernel * sourceK = pxDriver.addKernelInstance<kernel::MemorySourceKernel>(iBuilder, editDistance + 1, 8);
    684     sourceK->setInitialArguments({inputStream, fileSize});
    685     pxDriver.makeKernelCall(sourceK, {}, {MatchResults});
    686 
    687     auto editdScanK = pxDriver.addKernelInstance<editdScanKernel>(iBuilder, editDistance);
    688     pxDriver.makeKernelCall(editdScanK, {MatchResults}, {});
    689     pxDriver.LinkFunction(*editdScanK, "wrapped_report_pos", &wrapped_report_pos);
    690     pxDriver.generatePipelineIR();
    691     pxDriver.deallocateBuffers();
    692     iBuilder->CreateRetVoid();
    693 
    694     pxDriver.finalizeObject();
    695 
    696     return reinterpret_cast<editdFunctionType>(pxDriver.getMain());
    697 
    698 }
    699 
    700426int main(int argc, char *argv[]) {
    701427    codegen::ParseCommandLineOptions(argc, argv);
     
    706432
    707433    if (MultiEditdKernels) {
    708         ParabixDriver pxDriver("editd");
    709         multiEditdPipeline(pxDriver);
    710         auto editd_ptr = reinterpret_cast<multiEditdFunctionType>(pxDriver.getMain());
     434        CPUDriver pxDriver("editd");
     435        auto editd = multiEditdPipeline(pxDriver);
    711436
    712437        std::string fileName = inputFiles[0];
     
    716441            exit(-1);
    717442        }
    718         editd_ptr(fd);
     443        editd(fd);
    719444        close(fd);
    720445        run_second_filter(pattern_segs, total_len, 0.15);
     
    722447    }
    723448
    724 #ifdef CUDA_ENABLED
    725     if (codegen::NVPTX)
    726         codegen::BlockSize = 64;
    727 #endif
    728 
    729     ParabixDriver pxDriver("preprocess");
    730     preprocessPipeline(pxDriver);
    731     auto preprocess_ptr = reinterpret_cast<preprocessFunctionType>(pxDriver.getMain());
     449    CPUDriver pxDriver("preprocess");
     450    auto preprocess_ptr = preprocessPipeline(pxDriver);
    732451    preprocess(preprocess_ptr);
    733452
    734 #ifdef CUDA_ENABLED
    735     if(codegen::NVPTX){
    736 
    737         std::ifstream t(PatternFilename);
    738         if (!t.is_open()) {
    739             std::cerr << "Error: cannot open " << PatternFilename << " for processing. Skipped.\n";
    740             exit(-1);
    741         }
    742         std::string patterns((std::istreambuf_iterator<char>(t)), std::istreambuf_iterator<char>());
    743 
    744         editdGPUCodeGen(pattVector[0].length());
    745         mergeGPUCodeGen();
    746         ulong * rslt = RunPTX(PTXFilename, chStream, size, patterns.c_str(), patterns.length(), editDistance);
    747 
    748         ParabixDriver scanDriver("scan");
    749         editdFunctionType editd_ptr = editdScanCPUCodeGen(scanDriver);
    750         editd(editd_ptr, (char*)rslt, size);
    751 
    752         run_second_filter(pattern_segs, total_len, 0.15);
    753 
    754         return 0;
    755     }
    756 #endif
    757 
    758453    if(pattVector.size() == 1){
    759454
    760         ParabixDriver pxDriver("editd");
    761         editdPipeline(pxDriver, pattVector);
    762         auto editd_ptr = reinterpret_cast<editdFunctionType>(pxDriver.getMain());
    763         editd(editd_ptr, chStream, size);
     455        CPUDriver pxDriver("editd");
     456        auto editd = editdPipeline(pxDriver, pattVector);
     457        editd(chStream, size);
    764458        std::cout << "total matches is " << matchList.size() << std::endl;
    765459    }
    766460    else{
    767         if (Threads == 1) { 
     461        if (Threads == 1) {
    768462            if (EditdIndexPatternKernels) {
    769                 ParabixDriver pxDriver("editd");
    770                 editdIndexPatternPipeline(pxDriver, pattVector[0].length());
    771                 auto editd_ptr = reinterpret_cast<editdIndexFunctionType>(pxDriver.getMain());
     463                CPUDriver pxDriver("editd");
     464                auto editd_ptr = editdIndexPatternPipeline(pxDriver, pattVector[0].length());
    772465
    773466                for(unsigned i=0; i<pattVector.size(); i+=groupSize){
     
    782475                for(unsigned i=0; i<pattGroups.size(); i++){
    783476
    784                     ParabixDriver pxDriver("editd");
    785                     editdPipeline(pxDriver, pattGroups[i]);
    786                     auto editd_ptr = reinterpret_cast<editdFunctionType>(pxDriver.getMain());
    787                     editd(editd_ptr, chStream, size);
     477                    CPUDriver pxDriver("editd");
     478                    auto editd = editdPipeline(pxDriver, pattGroups[i]);
     479                    editd(chStream, size);
    788480                }
    789481            }
     
    813505    }
    814506
     507    AlignedAllocator<char, 32> alloc;
     508    alloc.deallocate(chStream, 0);
     509
    815510    return 0;
    816511}
  • icGREP/icgrep-devel/icgrep/editd/editd_cpu_kernel.cpp

    r5985 r6184  
    9595}
    9696
    97 editdCPUKernel::editdCPUKernel(const std::unique_ptr<kernel::KernelBuilder> & b, unsigned dist, unsigned pattLen, unsigned groupSize) :
    98 BlockOrientedKernel("editd_cpu",
    99              {Binding{b->getStreamSetTy(4), "CCStream"}},
    100              {Binding{b->getStreamSetTy(dist + 1), "ResultStream"}},
    101              {Binding{PointerType::get(b->getInt8Ty(), 1), "pattStream"},
    102              Binding{PointerType::get(ArrayType::get(b->getBitBlockType(), pattLen * (dist + 1) * 4 * groupSize), 0), "strideCarry"}},
    103              {},
    104              {Binding{b->getBitBlockType(), "EOFmask"}}),
    105 mEditDistance(dist),
    106 mPatternLen(pattLen),
    107 mGroupSize(groupSize){
     97editdCPUKernel::editdCPUKernel(const std::unique_ptr<kernel::KernelBuilder> & b,
     98                               const unsigned patternLen, const unsigned groupSize,
     99                               Scalar * const pattStream,
     100                               StreamSet * const CCStream, StreamSet * const ResultStream)
     101: BlockOrientedKernel("editd_cpu" + std::to_string(patternLen) + "x" + std::to_string(groupSize),
     102// input stream
     103{Binding{"CCStream", CCStream}},
     104// output stream
     105{Binding{"ResultStream", ResultStream}},
     106// input scalar
     107{Binding{"pattStream", pattStream}},
     108// output scalar
     109{},
     110// internal scalars
     111{Binding{b->getBitBlockType(), "EOFmask"},
     112 Binding{ArrayType::get(b->getBitBlockType(), (patternLen * groupSize * 4 * ResultStream->getNumElements())), "strideCarry"}}) {
     113
    108114}
    109115
    110116}
    111 
    112 
  • icGREP/icgrep-devel/icgrep/editd/editd_cpu_kernel.h

    r5603 r6184  
    1717public:
    1818
    19     editdCPUKernel(const std::unique_ptr<kernel::KernelBuilder> & b, unsigned dist, unsigned pattLen, unsigned groupSize);
     19    editdCPUKernel(const std::unique_ptr<kernel::KernelBuilder> & b,
     20                   const unsigned patternLen, const unsigned groupSize,
     21                   Scalar * const pattStream,
     22                   StreamSet * const CCStream, StreamSet * const ResultStream);
    2023   
    2124
  • icGREP/icgrep-devel/icgrep/editd/editdscan_kernel.cpp

    r5440 r6184  
    1313namespace kernel {
    1414
    15 void editdScanKernel::generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & idb) {
    16     auto savePoint = idb->saveIP();
    17     Function * scanWordFunction = generateScanWordRoutine(idb);
    18     idb->restoreIP(savePoint);
     15void editdScanKernel::generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & b) {
     16    auto savePoint = b->saveIP();
     17    Function * scanWordFunction = generateScanWordRoutine(b);
     18    b->restoreIP(savePoint);
    1919
    20     const unsigned fieldCount = idb->getBitBlockWidth() / mScanwordBitWidth;
    21     Type * T = idb->getIntNTy(mScanwordBitWidth);
     20    const unsigned fieldCount = b->getBitBlockWidth() / mScanwordBitWidth;
     21    Type * T = b->getIntNTy(mScanwordBitWidth);
    2222    VectorType * scanwordVectorType =  VectorType::get(T, fieldCount);
    23     Value * blockNo = idb->getScalarField("BlockNo");
    24     Value * scanwordPos = idb->CreateMul(blockNo, ConstantInt::get(blockNo->getType(), idb->getBitBlockWidth()));
     23    Value * blockNo = b->getScalarField("BlockNo");
     24    Value * scanwordPos = b->CreateMul(blockNo, ConstantInt::get(blockNo->getType(), b->getBitBlockWidth()));
    2525   
    2626    std::vector<Value * > matchWordVectors;
    27     for(unsigned d = 0; d <= mEditDistance; d++) {
    28         Value * matches = idb->loadInputStreamBlock("matchResults", idb->getInt32(d));
    29         matchWordVectors.push_back(idb->CreateBitCast(matches, scanwordVectorType));
     27    for(unsigned d = 0; d < mNumElements; d++) {
     28        Value * matches = b->loadInputStreamBlock("matchResults", b->getInt32(d));
     29        matchWordVectors.push_back(b->CreateBitCast(matches, scanwordVectorType));
    3030    }
    3131   
    3232    for(unsigned i = 0; i < fieldCount; ++i) {
    33         for(unsigned d = 0; d <= mEditDistance; d++) {
    34             Value * matchWord = idb->CreateExtractElement(matchWordVectors[d], ConstantInt::get(T, i));
    35             idb->CreateCall(scanWordFunction, {matchWord, idb->getInt32(d), scanwordPos});
     33        for(unsigned d = 0; d < mNumElements; d++) {
     34            Value * matchWord = b->CreateExtractElement(matchWordVectors[d], ConstantInt::get(T, i));
     35            b->CreateCall(scanWordFunction, {matchWord, b->getInt32(d), scanwordPos});
    3636        }
    37         scanwordPos = idb->CreateAdd(scanwordPos, ConstantInt::get(T, mScanwordBitWidth));
     37        scanwordPos = b->CreateAdd(scanwordPos, ConstantInt::get(T, mScanwordBitWidth));
    3838    }
    3939
    40     idb->setScalarField("BlockNo", idb->CreateAdd(blockNo, idb->getSize(1)));
     40    b->setScalarField("BlockNo", b->CreateAdd(blockNo, b->getSize(1)));
    4141}
    4242
    43 Function * editdScanKernel::generateScanWordRoutine(const std::unique_ptr<KernelBuilder> &iBuilder) const {
     43Function * editdScanKernel::generateScanWordRoutine(const std::unique_ptr<KernelBuilder> &b) const {
    4444
    45     IntegerType * T = iBuilder->getIntNTy(mScanwordBitWidth);
    46     Module * const m = iBuilder->getModule();
     45    IntegerType * T = b->getIntNTy(mScanwordBitWidth);
     46    Module * const m = b->getModule();
    4747
    48     Function * scanFunc = cast<Function>(m->getOrInsertFunction("scan_word", iBuilder->getVoidTy(), T, iBuilder->getInt32Ty(), T, nullptr));
     48    Function * scanFunc = cast<Function>(m->getOrInsertFunction("scan_word", b->getVoidTy(), T, b->getInt32Ty(), T, nullptr));
    4949    scanFunc->setCallingConv(CallingConv::C);
    5050    Function::arg_iterator args = scanFunc->arg_begin();
     
    5757    basePos->setName("basePos");
    5858
    59     Constant * matchProcessor = m->getOrInsertFunction("wrapped_report_pos", iBuilder->getVoidTy(), T, iBuilder->getInt32Ty(), nullptr);
    60     BasicBlock * entryBlock = BasicBlock::Create(iBuilder->getContext(), "entry", scanFunc, 0);
    61     BasicBlock * matchesCondBlock = BasicBlock::Create(iBuilder->getContext(), "matchesCond", scanFunc, 0);
    62     BasicBlock * matchesLoopBlock = BasicBlock::Create(iBuilder->getContext(), "matchesLoop", scanFunc, 0);
    63     BasicBlock * matchesDoneBlock = BasicBlock::Create(iBuilder->getContext(), "matchesDone", scanFunc, 0);
     59    Constant * matchProcessor = m->getOrInsertFunction("wrapped_report_pos", b->getVoidTy(), T, b->getInt32Ty(), nullptr);
     60    BasicBlock * entryBlock = BasicBlock::Create(b->getContext(), "entry", scanFunc, 0);
     61    BasicBlock * matchesCondBlock = BasicBlock::Create(b->getContext(), "matchesCond", scanFunc, 0);
     62    BasicBlock * matchesLoopBlock = BasicBlock::Create(b->getContext(), "matchesLoop", scanFunc, 0);
     63    BasicBlock * matchesDoneBlock = BasicBlock::Create(b->getContext(), "matchesDone", scanFunc, 0);
    6464
    65     iBuilder->SetInsertPoint(entryBlock);
    66     iBuilder->CreateBr(matchesCondBlock);
     65    b->SetInsertPoint(entryBlock);
     66    b->CreateBr(matchesCondBlock);
    6767
    68     iBuilder->SetInsertPoint(matchesCondBlock);
    69     PHINode * matches_phi = iBuilder->CreatePHI(T, 2, "matches");
     68    b->SetInsertPoint(matchesCondBlock);
     69    PHINode * matches_phi = b->CreatePHI(T, 2, "matches");
    7070    matches_phi->addIncoming(matchWord, entryBlock);
    71     Value * have_matches_cond = iBuilder->CreateICmpUGT(matches_phi, ConstantInt::get(T, 0));
    72     iBuilder->CreateCondBr(have_matches_cond, matchesLoopBlock, matchesDoneBlock);
     71    Value * have_matches_cond = b->CreateICmpUGT(matches_phi, ConstantInt::get(T, 0));
     72    b->CreateCondBr(have_matches_cond, matchesLoopBlock, matchesDoneBlock);
    7373
    74     iBuilder->SetInsertPoint(matchesLoopBlock);
    75     Value * match_pos = iBuilder->CreateAdd(iBuilder->CreateCountForwardZeroes(matches_phi), basePos);
    76     Value * matches_new = iBuilder->CreateAnd(matches_phi, iBuilder->CreateSub(matches_phi, ConstantInt::get(T, 1)));
     74    b->SetInsertPoint(matchesLoopBlock);
     75    Value * match_pos = b->CreateAdd(b->CreateCountForwardZeroes(matches_phi), basePos);
     76    Value * matches_new = b->CreateAnd(matches_phi, b->CreateSub(matches_phi, ConstantInt::get(T, 1)));
    7777    matches_phi->addIncoming(matches_new, matchesLoopBlock);
    78     iBuilder->CreateCall(matchProcessor, std::vector<Value *>({match_pos, dist}));
    79     iBuilder->CreateBr(matchesCondBlock);
     78    b->CreateCall(matchProcessor, std::vector<Value *>({match_pos, dist}));
     79    b->CreateBr(matchesCondBlock);
    8080
    81     iBuilder->SetInsertPoint(matchesDoneBlock);
    82     iBuilder -> CreateRetVoid();
     81    b->SetInsertPoint(matchesDoneBlock);
     82    b -> CreateRetVoid();
    8383
    8484    return scanFunc;
     
    8686}
    8787
    88 editdScanKernel::editdScanKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, unsigned dist) :
    89 BlockOrientedKernel("scanMatch",
    90               {Binding{iBuilder->getStreamSetTy(dist + 1), "matchResults"}},
    91               {}, {}, {}, {Binding{iBuilder->getSizeTy(), "BlockNo"}}),
    92 mEditDistance(dist),
    93 mScanwordBitWidth(iBuilder->getSizeTy()->getBitWidth()) {
    94 
     88editdScanKernel::editdScanKernel(const std::unique_ptr<kernel::KernelBuilder> & b, StreamSet * matchResults) :
     89BlockOrientedKernel("editdScanMatch" + std::to_string(matchResults->getNumElements()),
     90              {Binding{"matchResults", matchResults}},
     91              {}, {}, {}, {Binding{b->getSizeTy(), "BlockNo"}}),
     92mNumElements(matchResults->getNumElements()),
     93mScanwordBitWidth(b->getSizeTy()->getBitWidth()) {
     94    addAttribute(SideEffecting());
    9595}
    9696
  • icGREP/icgrep-devel/icgrep/editd/editdscan_kernel.h

    r5440 r6184  
    1515class editdScanKernel : public BlockOrientedKernel {
    1616public:
    17     editdScanKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, unsigned dist);
     17    editdScanKernel(const std::unique_ptr<kernel::KernelBuilder> & b, StreamSet * matchResults);
    1818       
    1919private:
     
    2121    llvm::Function * generateScanWordRoutine(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) const;
    2222       
    23     unsigned mEditDistance;
     23    unsigned mNumElements;
    2424    unsigned mScanwordBitWidth;
    2525};
  • icGREP/icgrep-devel/icgrep/grep/grep_engine.cpp

    r6147 r6184  
    5656#include <llvm/Support/Debug.h>
    5757#include <llvm/Support/Casting.h>
     58#include <kernels/pipeline_builder.h>
    5859#include <sched.h>
    5960
    60 using namespace parabix;
    6161using namespace llvm;
    6262using namespace cc;
     
    7272static cl::opt<unsigned> ByteCClimit("byte-CC-limit", cl::desc("Max number of CCs for byte CC pipeline."), cl::init(DefaultByteCClimit));
    7373
     74const auto ENCODING_BITS = 8;
    7475
    7576namespace grep {
    76    
     77
     78using Alphabets = ICGrepKernel::Alphabets;
     79
    7780extern "C" void signal_dispatcher(intptr_t callback_object_addr, unsigned signal) {
    7881    reinterpret_cast<GrepCallBackObject *>(callback_object_addr)->handle_signal(signal);
     
    136139}
    137140
    138 
    139 
    140141// Grep Engine construction and initialization.
    141142
    142 GrepEngine::GrepEngine() :
     143GrepEngine::GrepEngine(BaseDriver &driver) :
    143144    mSuppressFileMessages(false),
    144145    mBinaryFilesMode(argv::Text),
     
    152153    mMaxCount(0),
    153154    mGrepStdIn(false),
    154     mGrepDriver(make_unique<ParabixDriver>("engine")),
     155    mGrepDriver(driver),
     156    mMainMethod(nullptr),
    155157    mNextFileToGrep(0),
    156158    mNextFileToPrint(0),
     
    160162    mEngineThread(pthread_self()) {}
    161163
    162 QuietModeEngine::QuietModeEngine() : GrepEngine() {
     164QuietModeEngine::QuietModeEngine(BaseDriver &driver) : GrepEngine(driver) {
    163165    mEngineKind = EngineKind::QuietMode;
    164166    mMoveMatchesToEOL = false;
     
    166168}
    167169
    168 MatchOnlyEngine::MatchOnlyEngine(bool showFilesWithMatch, bool useNullSeparators) :
    169     GrepEngine(), mRequiredCount(showFilesWithMatch) {
     170MatchOnlyEngine::MatchOnlyEngine(BaseDriver & driver, bool showFilesWithMatch, bool useNullSeparators) :
     171    GrepEngine(driver), mRequiredCount(showFilesWithMatch) {
    170172    mEngineKind = EngineKind::MatchOnly;
    171173    mFileSuffix = useNullSeparators ? std::string("\0", 1) : "\n";
     
    175177}
    176178
    177 CountOnlyEngine::CountOnlyEngine() : GrepEngine() {
     179CountOnlyEngine::CountOnlyEngine(BaseDriver &driver) : GrepEngine(driver) {
    178180    mEngineKind = EngineKind::CountOnly;
    179181    mFileSuffix = ":";
    180182}
    181183
    182 EmitMatchesEngine::EmitMatchesEngine() : GrepEngine() {
     184EmitMatchesEngine::EmitMatchesEngine(BaseDriver &driver)
     185: GrepEngine(driver) {
    183186    mEngineKind = EngineKind::EmitMatches;
    184187    mFileSuffix = mInitialTab ? "\t:" : ":";
     
    189192    mGrepRecordBreak = b;
    190193}
    191 
    192    
    193 
    194194   
    195195void GrepEngine::initFileResult(std::vector<boost::filesystem::path> & paths) {
     
    229229}
    230230
    231 
    232    
    233231// Code Generation
    234232//
    235233// All engines share a common pipeline to compute a stream of Matches from a given input Bytestream.
    236234
    237 unsigned LLVM_READNONE calculateMaxCountRate(const std::unique_ptr<kernel::KernelBuilder> & b) {
    238     const unsigned packSize = b->getSizeTy()->getBitWidth();
    239     return (packSize * packSize) / b->getBitBlockWidth();
    240 }
    241    
    242 std::pair<StreamSetBuffer *, StreamSetBuffer *> GrepEngine::grepPipeline(StreamSetBuffer * SourceStream, Value * callback_object_addr) {
    243     auto & idb = mGrepDriver->getBuilder();
    244     const unsigned segmentSize = codegen::SegmentSize;
    245     const unsigned bufferSegments = codegen::BufferSegments * codegen::ThreadNum;
    246     // TODO: until we automate stream buffer sizing, use this calculation to determine how large our matches buffer needs to be.
    247     const unsigned baseBufferSize = segmentSize * (mMaxCount > 0 ? (std::max(bufferSegments, calculateMaxCountRate(idb))) : bufferSegments);
    248     const unsigned encodingBits = 8;
    249    
    250    
     235std::pair<StreamSet *, StreamSet *> GrepEngine::grepPipeline(const std::unique_ptr<PipelineBuilder> & P, StreamSet *SourceStream) {
     236
     237    Scalar * const callbackObject = P->getInputScalar("callbackObject");
     238
    251239    //  Regular Expression Processing and Analysis Phase
    252     const auto nREs = mREs.size();
    253     bool hasGCB[nREs];
    254     bool anyGCB = false;
    255 
    256     for(unsigned i = 0; i < nREs; ++i) {
    257         hasGCB[i] = hasGraphemeClusterBoundary(mREs[i]);
    258         anyGCB |= hasGCB[i];
    259     }
    260     StreamSetBuffer * ByteStream = nullptr;
     240
     241    StreamSet * ByteStream = nullptr;
    261242    if (mBinaryFilesMode == argv::Text) {
    262243        ByteStream = SourceStream;
    263244    } else if (mBinaryFilesMode == argv::WithoutMatch) {
    264         ByteStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 8), baseBufferSize);
    265         kernel::Kernel * binaryCheckK = mGrepDriver->addKernelInstance<kernel::AbortOnNull>(idb);
    266         binaryCheckK->setInitialArguments({callback_object_addr});
    267         mGrepDriver->makeKernelCall(binaryCheckK, {SourceStream}, {ByteStream});
    268         mGrepDriver->LinkFunction(*binaryCheckK, "signal_dispatcher", &signal_dispatcher);
     245        ByteStream = P->CreateStreamSet(1, 8);
     246        Kernel * binaryCheckK = P->CreateKernelCall<AbortOnNull>(SourceStream, ByteStream, callbackObject);
     247        mGrepDriver.LinkFunction(binaryCheckK, "signal_dispatcher", signal_dispatcher);
    269248    } else {
    270249        llvm::report_fatal_error("Binary mode not supported.");
    271250    }
    272     StreamSetBuffer * LineBreakStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    273     std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
    274    
     251
     252    const auto numOfREs = mREs.size();
     253    bool hasGCB[numOfREs];
     254    bool anyGCB = false;
     255
     256    for(unsigned i = 0; i < numOfREs; ++i) {
     257        hasGCB[i] = hasGraphemeClusterBoundary(mREs[i]);
     258        anyGCB |= hasGCB[i];
     259    }
     260
     261
     262    StreamSet * LineBreakStream = P->CreateStreamSet();
     263    std::vector<StreamSet *> MatchResultsBufs(numOfREs);
     264
    275265    re::RE * prefixRE;
    276266    re::RE * suffixRE;
    277267    // For simple regular expressions with a small number of characters, we
    278268    // can bypass transposition and use the Direct CC compiler.
    279     bool isSimple = (nREs == 1) && (mGrepRecordBreak != GrepRecordBreakKind::Unicode) && (!anyGCB);
     269    const auto isSimple = (numOfREs == 1) && (mGrepRecordBreak != GrepRecordBreakKind::Unicode) && (!anyGCB);
    280270    if (isSimple) {
    281271        mREs[0] = toUTF8(mREs[0]);
    282272    }
    283     if (isSimple && byteTestsWithinLimit(mREs[0], ByteCClimit)) {
    284         std::vector<std::string> externalStreamNames;
    285         std::vector<StreamSetBuffer *> icgrepInputSets = {ByteStream};
    286         if (MultithreadedSimpleRE && hasTriCCwithinLimit(mREs[0], ByteCClimit, prefixRE, suffixRE)) {
    287             auto CCs = re::collectCCs(prefixRE, &cc::Byte);
    288             for (auto cc : CCs) {
    289                 auto ccName = makeName(cc);
    290                 mREs[0] = re::replaceCC(mREs[0], cc, ccName);
    291                 std::string ccNameStr = ccName->getFullName();
    292                 StreamSetBuffer * ccStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    293                 kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, ccNameStr, std::vector<re::CC *>{cc});
    294                 mGrepDriver->makeKernelCall(ccK, {ByteStream}, {ccStream});
    295                 externalStreamNames.push_back(ccNameStr);
    296                 icgrepInputSets.push_back(ccStream);
    297             }
    298         }
    299         StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    300         kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ByteGrepKernel>(idb, mREs[0], externalStreamNames);
    301         mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
    302         MatchResultsBufs[0] = MatchResults;
    303         kernel::Kernel * breakK = mGrepDriver->addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, "breakCC", std::vector<re::CC *>{mBreakCC});
    304         mGrepDriver->makeKernelCall(breakK, {ByteStream}, {LineBreakStream});
    305     } else if (isSimple && hasTriCCwithinLimit(mREs[0], ByteCClimit, prefixRE, suffixRE)) {
    306         std::vector<std::string> externalStreamNames;
    307         std::vector<StreamSetBuffer *> icgrepInputSets = {ByteStream};
    308         if (MultithreadedSimpleRE) {
    309             auto CCs = re::collectCCs(prefixRE, &cc::Byte);
    310             for (auto cc : CCs) {
    311                 auto ccName = makeName(cc);
    312                 mREs[0] = re::replaceCC(mREs[0], cc, ccName);
    313                 std::string ccNameStr = ccName->getFullName();
    314                 StreamSetBuffer * ccStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    315                 kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, ccNameStr, std::vector<re::CC *>{cc});
    316                 mGrepDriver->makeKernelCall(ccK, {ByteStream}, {ccStream});
    317                 externalStreamNames.push_back(ccNameStr);
    318                 icgrepInputSets.push_back(ccStream);
    319             }
    320         }
    321         StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    322         kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ByteBitGrepKernel>(idb, prefixRE, suffixRE, externalStreamNames);
    323         mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
    324         MatchResultsBufs[0] = MatchResults;
    325         kernel::Kernel * breakK = mGrepDriver->addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, "breakCC", std::vector<re::CC *>{mBreakCC});
    326         mGrepDriver->makeKernelCall(breakK, {ByteStream}, {LineBreakStream});
    327     } else {
    328        
    329         StreamSetBuffer * BasisBits = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(encodingBits, 1), baseBufferSize);
    330         kernel::Kernel * s2pk = nullptr;
     273
     274    bool requiresComplexTest = true;
     275
     276    if (isSimple) {
     277        const auto isWithinByteTestLimit = byteTestsWithinLimit(mREs[0], ByteCClimit);
     278        const auto hasTriCC = hasTriCCwithinLimit(mREs[0], ByteCClimit, prefixRE, suffixRE);
     279        if (isWithinByteTestLimit || hasTriCC) {
     280            std::vector<Binding> inputSets;
     281            inputSets.emplace_back("byteData", ByteStream);
     282            if (MultithreadedSimpleRE && hasTriCC) {
     283                auto CCs = re::collectCCs(prefixRE, cc::Byte);
     284                inputSets.reserve(CCs.size());
     285                for (auto cc : CCs) {
     286                    auto ccName = makeName(cc);
     287                    mREs[0] = re::replaceCC(mREs[0], cc, ccName);
     288                    auto ccNameStr = ccName->getFullName();
     289                    StreamSet * const ccStream = P->CreateStreamSet(1, 1);
     290                    P->CreateKernelCall<DirectCharacterClassKernelBuilder>(ccNameStr, std::vector<re::CC *>{cc}, ByteStream, ccStream);
     291                    inputSets.emplace_back(ccNameStr, ccStream);
     292                }
     293            }
     294            StreamSet * const MatchResults = P->CreateStreamSet(1, 1);
     295            MatchResultsBufs[0] = MatchResults;
     296            if (isWithinByteTestLimit) {
     297                P->CreateKernelCall<ByteGrepKernel>(mREs[0], inputSets, MatchResults);
     298            } else {
     299                P->CreateKernelCall<ByteBitGrepKernel>(prefixRE, suffixRE, inputSets, MatchResults);
     300            }
     301            P->CreateKernelCall<DirectCharacterClassKernelBuilder>( "breakCC", std::vector<re::CC *>{mBreakCC}, ByteStream, LineBreakStream);
     302            requiresComplexTest = false;
     303        }
     304    }
     305
     306    if (requiresComplexTest) {
     307
     308        StreamSet * const BasisBits = P->CreateStreamSet(ENCODING_BITS, 1);
    331309        if (PabloTransposition) {
    332             s2pk = mGrepDriver->addKernelInstance<kernel::S2P_PabloKernel>(idb);
    333         }
    334         else {
    335             s2pk = mGrepDriver->addKernelInstance<kernel::S2PKernel>(idb);
    336         }
    337         mGrepDriver->makeKernelCall(s2pk, {ByteStream}, {BasisBits});
    338 
    339         StreamSetBuffer * RequiredStreams = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    340         StreamSetBuffer * UnicodeLB = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    341 
    342         StreamSetBuffer * LineFeedStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    343         kernel::Kernel * linefeedK = mGrepDriver->addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()});
    344         mGrepDriver->makeKernelCall(linefeedK, {BasisBits}, {LineFeedStream});
    345        
    346         kernel::Kernel * requiredStreamsK = mGrepDriver->addKernelInstance<kernel::RequiredStreams_UTF8>(idb);
    347         mGrepDriver->makeKernelCall(requiredStreamsK, {BasisBits, LineFeedStream}, {RequiredStreams, UnicodeLB});
     310            P->CreateKernelCall<S2P_PabloKernel>(ByteStream, BasisBits);
     311        } else {
     312            P->CreateKernelCall<S2PKernel>(ByteStream, BasisBits);
     313        }
     314
     315        StreamSet * const RequiredStreams = P->CreateStreamSet();
     316        StreamSet * const UnicodeLB = P->CreateStreamSet();
     317        StreamSet * const LineFeedStream = P->CreateStreamSet();
     318
     319        P->CreateKernelCall<LineFeedKernelBuilder>(BasisBits, LineFeedStream);
     320        P->CreateKernelCall<RequiredStreams_UTF8>(BasisBits, LineFeedStream, RequiredStreams, UnicodeLB);
    348321
    349322        if (mGrepRecordBreak == GrepRecordBreakKind::LF) {
    350323            LineBreakStream = LineFeedStream;
    351324        } else if (mGrepRecordBreak == GrepRecordBreakKind::Null) {
    352             kernel::Kernel * breakK = mGrepDriver->addKernelInstance<kernel::ParabixCharacterClassKernelBuilder>(idb, "Null", std::vector<re::CC *>{mBreakCC}, 8);
    353             mGrepDriver->makeKernelCall(breakK, {BasisBits}, {LineBreakStream});
     325            LineBreakStream = P->CreateStreamSet();
     326            P->CreateKernelCall<DirectCharacterClassKernelBuilder>( "Null", std::vector<re::CC *>{mBreakCC}, BasisBits, LineBreakStream);
    354327        } else {
    355328            LineBreakStream = UnicodeLB;
    356329        }
    357        
    358         std::map<std::string, StreamSetBuffer *> propertyStream;
     330
     331        std::map<std::string, StreamSet *> propertyStream;
    359332        if (PropertyKernels) {
    360333            for (auto p : mUnicodeProperties) {
    361334                auto name = p->getFullName();
    362                 StreamSetBuffer * s = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    363                 propertyStream.emplace(std::make_pair(name, s));
    364                 kernel::Kernel * propertyK = mGrepDriver->addKernelInstance<kernel::UnicodePropertyKernelBuilder>(idb, p);
    365                 mGrepDriver->makeKernelCall(propertyK, {BasisBits}, {s});
    366             }
    367         }
    368         StreamSetBuffer * GCB_stream = nullptr;
     335                StreamSet * property = P->CreateStreamSet(1, 1);
     336                propertyStream.emplace(name, property);
     337                P->CreateKernelCall<UnicodePropertyKernelBuilder>(p, BasisBits, property);
     338            }
     339        }
     340
     341        StreamSet * GCB_stream = nullptr;
    369342        if (anyGCB) {
    370             GCB_stream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    371             kernel::Kernel * gcbK = mGrepDriver->addKernelInstance<kernel::GraphemeClusterBreakKernel>(idb);
    372             mGrepDriver->makeKernelCall(gcbK, {BasisBits, RequiredStreams}, {GCB_stream});
    373         }
    374 
    375         for(unsigned i = 0; i < nREs; ++i) {
    376             std::vector<std::string> externalStreamNames;
    377             std::vector<StreamSetBuffer *> icgrepInputSets = {BasisBits};
     343            GCB_stream = P->CreateStreamSet();
     344            P->CreateKernelCall<GraphemeClusterBreakKernel>(BasisBits, RequiredStreams, GCB_stream);
     345        }
     346
     347        for(unsigned i = 0; i < numOfREs; ++i) {
     348            ICGrepKernel::Externals externals;
    378349            if (mGrepRecordBreak == GrepRecordBreakKind::Unicode) {
    379                 externalStreamNames.push_back("UTF8_LB");
    380                 icgrepInputSets.push_back(LineBreakStream);
    381                 externalStreamNames.push_back("UTF8_nonfinal");
    382                 icgrepInputSets.push_back(RequiredStreams);
     350                externals.emplace_back("UTF8_LB", LineBreakStream);
     351                externals.emplace_back("UTF8_nonfinal", RequiredStreams);
    383352            }
    384353            std::set<re::Name *> UnicodeProperties;
    385354            if (PropertyKernels) {
    386355                re::gatherUnicodeProperties(mREs[i], UnicodeProperties);
    387                 for (auto p : UnicodeProperties) {
     356                for (const auto & p : UnicodeProperties) {
    388357                    auto name = p->getFullName();
    389                     auto f = propertyStream.find(name);
    390                     if (f == propertyStream.end()) report_fatal_error(name + " not found\n");
    391                     externalStreamNames.push_back(name);
    392                     icgrepInputSets.push_back(f->second);
     358                    const auto f = propertyStream.find(name);
     359                    if (LLVM_UNLIKELY(f == propertyStream.end())) {
     360                        report_fatal_error(name + " not found");
     361                    }
     362                    externals.emplace_back(name, f->second);
    393363                }
    394364            }
    395             if (hasGCB[i]) {
    396                 externalStreamNames.push_back("\\b{g}");
    397                 icgrepInputSets.push_back(GCB_stream);
    398             }
     365            if (hasGCB[i]) { assert (GCB_stream);
     366                externals.emplace_back("\\b{g}", GCB_stream);
     367            }
     368
     369            StreamSet * const MatchResults = P->CreateStreamSet(1, 1);
     370            MatchResultsBufs[i] = MatchResults;
     371
    399372            if (CC_Multiplexing) {
    400                 const auto UnicodeSets = re::collectCCs(mREs[i], &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
    401                 StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     373                const auto UnicodeSets = re::collectCCs(mREs[i], cc::Unicode, std::set<re::Name *>{re::makeZeroWidth("\\b{g}")});
    402374                if (UnicodeSets.size() <= 1) {
    403                     kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[i], externalStreamNames);
    404                     mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
    405                     MatchResultsBufs[i] = MatchResults;
     375                    P->CreateKernelCall<ICGrepKernel>(mREs[i], BasisBits, MatchResults, externals);
    406376                } else {
    407                     mpx = make_unique<MultiplexedAlphabet>("mpx", UnicodeSets);
    408                     mREs[i] = transformCCs(mpx.get(), mREs[i]);
    409                     std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
    410                     auto numOfCharacterClasses = mpx_basis.size();
    411                     StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
    412                     kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis));
    413                     mGrepDriver->makeKernelCall(ccK, {BasisBits}, {CharClasses});
    414     //                kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis), true);
    415     //                mGrepDriver->makeKernelCall(ccK, {ByteStream}, {CharClasses});
    416                     kernel::ICGrepKernel * icgrepK = (kernel::ICGrepKernel*)mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[i], externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()}, cc::BitNumbering::LittleEndian);
     377                    auto mpx = std::make_shared<MultiplexedAlphabet>("mpx", UnicodeSets);
     378                    mREs[i] = transformCCs(mpx, mREs[i]);
     379                    auto mpx_basis = mpx->getMultiplexedCCs();
     380                    StreamSet * const CharClasses = P->CreateStreamSet(mpx_basis.size());
     381                    P->CreateKernelCall<CharClassesKernel>(std::move(mpx_basis), BasisBits, CharClasses);
     382
     383                    #warning TODO: multiplexed CCs ought to generate unique names. Make the name also dependent on alphabet.
    417384                    // Multiplexing Grep Kernel is not Cachable, since for now it use string representation of RE AST as cache key,
    418385                    // whileit is possible that two multiplexed REs with the same name "mpx_1" have different alphabets
    419                     icgrepK->setCachable(false);
    420                     icgrepInputSets.push_back(CharClasses);
    421                     mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
    422                     MatchResultsBufs[i] = MatchResults;
     386
     387                    Alphabets alphabets;
     388                    alphabets.emplace_back(mpx, CharClasses);
     389                    P->CreateKernelCall<ICGrepKernel>(mREs[i], BasisBits, MatchResults, externals, alphabets, cc::BitNumbering::LittleEndian, false);
    423390                }
    424391            } else {
    425                 StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    426                 kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[i], externalStreamNames);
    427                 mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
    428                 MatchResultsBufs[i] = MatchResults;
    429             }
    430         }
    431     }
    432 
    433     StreamSetBuffer * MergedResults = MatchResultsBufs[0];
    434     if (mREs.size() > 1) {
    435         MergedResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    436         kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance<kernel::StreamsMerge>(idb, 1, mREs.size());
    437         mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
    438     }
    439     StreamSetBuffer * Matches = MergedResults;
     392                P->CreateKernelCall<ICGrepKernel>(mREs[i], BasisBits, MatchResults, externals);
     393            }
     394        }
     395
     396    } // end of requiresComplexTest
     397
     398    StreamSet * Matches = MatchResultsBufs[0];
     399    if (MatchResultsBufs.size() > 1) {
     400        StreamSet * const MergedMatches = P->CreateStreamSet();
     401        P->CreateKernelCall<StreamsMerge>(MatchResultsBufs, MergedMatches);
     402        Matches = MergedMatches;
     403    }
    440404    if (mMoveMatchesToEOL) {
    441         StreamSetBuffer * OriginalMatches = Matches;
    442         kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
    443         Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    444         mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
     405        StreamSet * const MovedMatches = P->CreateStreamSet();
     406        P->CreateKernelCall<MatchedLinesKernel>(Matches, LineBreakStream, MovedMatches);
     407        Matches = MovedMatches;
    445408    }
    446409    if (mInvertMatches) {
    447         kernel::Kernel * invertK = mGrepDriver->addKernelInstance<kernel::InvertMatchesKernel>(idb);
    448         StreamSetBuffer * OriginalMatches = Matches;
    449         Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    450         mGrepDriver->makeKernelCall(invertK, {OriginalMatches, LineBreakStream}, {Matches});
     410        StreamSet * const InvertedMatches = P->CreateStreamSet();
     411        P->CreateKernelCall<InvertMatchesKernel>(Matches, LineBreakStream, InvertedMatches);
     412        Matches = InvertedMatches;
    451413    }
    452414    if (mMaxCount > 0) {
    453         kernel::Kernel * untilK = mGrepDriver->addKernelInstance<kernel::UntilNkernel>(idb);
    454         untilK->setInitialArguments({idb->getSize(mMaxCount)});
    455         StreamSetBuffer * const AllMatches = Matches;
    456         Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    457         mGrepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
    458     }
    459 
    460     return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
    461 }
     415        StreamSet * const TruncatedMatches = P->CreateStreamSet();
     416        Scalar * const maxCount = P->getInputScalar("maxCount");
     417        P->CreateKernelCall<UntilNkernel>(maxCount, Matches, TruncatedMatches);
     418        Matches = TruncatedMatches;
     419    }
     420    return std::pair<StreamSet *, StreamSet *>(LineBreakStream, Matches);
     421}
     422
     423
    462424
    463425// The QuietMode, MatchOnly and CountOnly engines share a common code generation main function,
     
    466428
    467429void GrepEngine::grepCodeGen() {
    468     auto & idb = mGrepDriver->getBuilder();
    469     Module * M = idb->getModule();
    470 
    471     const unsigned encodingBits = 8;
    472 
    473     Function * mainFunc = cast<Function>(M->getOrInsertFunction("Main", idb->getInt64Ty(), idb->getInt8Ty(), idb->getInt32Ty(), idb->getIntAddrTy(), nullptr));
    474     mainFunc->setCallingConv(CallingConv::C);
    475     idb->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
    476     auto args = mainFunc->arg_begin();
    477 
    478     Value * const useMMap = &*(args++);
    479     useMMap->setName("useMMap");
    480     Value * const fileDescriptor = &*(args++);
    481     fileDescriptor->setName("fileDescriptor");
    482     Value * call_back_object = &*(args++);
    483     call_back_object->setName("call_back_object");
    484 
    485     StreamSetBuffer * const ByteStream = mGrepDriver->addBuffer<ExternalBuffer>(idb, idb->getStreamSetTy(1, encodingBits));
    486     kernel::Kernel * const sourceK = mGrepDriver->addKernelInstance<kernel::FDSourceKernel>(idb);
    487     sourceK->setInitialArguments({useMMap, fileDescriptor});
    488     mGrepDriver->makeKernelCall(sourceK, {}, {ByteStream});
    489 
    490     StreamSetBuffer * LineBreakStream;
    491     StreamSetBuffer * Matches;
    492     std::tie(LineBreakStream, Matches) = grepPipeline(ByteStream, call_back_object);
    493 
    494     kernel::Kernel * matchCountK = mGrepDriver->addKernelInstance<kernel::PopcountKernel>(idb);
    495     mGrepDriver->makeKernelCall(matchCountK, {Matches}, {});
    496     mGrepDriver->generatePipelineIR();
    497     idb->setKernel(matchCountK);
    498     Value * matchedLineCount = idb->getAccumulator("countResult");
    499     matchedLineCount = idb->CreateZExt(matchedLineCount, idb->getInt64Ty());
    500     mGrepDriver->deallocateBuffers();
    501     idb->CreateRet(matchedLineCount);
    502    
    503     mGrepDriver->finalizeObject();
     430    auto & idb = mGrepDriver.getBuilder();
     431
     432    auto P = mGrepDriver.makePipeline(
     433                // inputs
     434                {Binding{idb->getInt8Ty(), "useMMap"},
     435                Binding{idb->getInt32Ty(), "fileDescriptor"},
     436                Binding{idb->getIntAddrTy(), "callbackObject"},
     437                Binding{idb->getSizeTy(), "maxCount"}}
     438                ,// output
     439                {Binding{idb->getInt64Ty(), "countResult"}});
     440
     441    Scalar * const useMMap = P->getInputScalar("useMMap");
     442    Scalar * const fileDescriptor = P->getInputScalar("fileDescriptor");
     443
     444    StreamSet * const ByteStream = P->CreateStreamSet(1, ENCODING_BITS);
     445    P->CreateKernelCall<FDSourceKernel>(useMMap, fileDescriptor, ByteStream);
     446    StreamSet * const Matches = grepPipeline(P, ByteStream).second;
     447    P->CreateKernelCall<PopcountKernel>(Matches, P->getOutputScalar("countResult"));
     448
     449    mMainMethod = P->compile();
    504450}
    505451
     
    520466        }
    521467    }
    522     size_t bytes = line_end - line_start + 1;
     468
     469    const auto bytes = line_end - line_start + 1;
    523470    mResultStr.write(line_start, bytes);
    524471    mLineCount++;
     
    543490
    544491void EmitMatchesEngine::grepCodeGen() {
    545     auto & idb = mGrepDriver->getBuilder();
    546     Module * M = idb->getModule();
    547 
    548     const unsigned encodingBits = 8;
    549 
    550     Function * mainFunc = cast<Function>(M->getOrInsertFunction("Main", idb->getInt64Ty(), idb->getInt8Ty(), idb->getInt32Ty(), idb->getIntAddrTy(), nullptr));
    551     mainFunc->setCallingConv(CallingConv::C);
    552     idb->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
    553     auto args = mainFunc->arg_begin();
    554 
    555     Value * const useMMap = &*(args++);
    556     useMMap->setName("useMMap");
    557     Value * const fileDescriptor = &*(args++);
    558     fileDescriptor->setName("fileDescriptor");
    559     Value * match_accumulator = &*(args++);
    560     match_accumulator->setName("match_accumulator");
    561 
    562     StreamSetBuffer * ByteStream = mGrepDriver->addBuffer<ExternalBuffer>(idb, idb->getStreamSetTy(1, encodingBits));
    563     kernel::Kernel * sourceK = mGrepDriver->addKernelInstance<kernel::FDSourceKernel>(idb);
    564     sourceK->setInitialArguments({useMMap, fileDescriptor});
    565     mGrepDriver->makeKernelCall(sourceK, {}, {ByteStream});
    566 
    567     StreamSetBuffer * LineBreakStream;
    568     StreamSetBuffer * Matches;
    569     std::tie(LineBreakStream, Matches) = grepPipeline(ByteStream, match_accumulator);
    570 
    571     kernel::Kernel * scanMatchK = mGrepDriver->addKernelInstance<kernel::ScanMatchKernel>(idb);
    572     scanMatchK->setInitialArguments({match_accumulator});
    573     mGrepDriver->makeKernelCall(scanMatchK, {Matches, LineBreakStream, ByteStream}, {});
    574     mGrepDriver->LinkFunction(*scanMatchK, "accumulate_match_wrapper", &accumulate_match_wrapper);
    575     mGrepDriver->LinkFunction(*scanMatchK, "finalize_match_wrapper", &finalize_match_wrapper);
    576 
    577     mGrepDriver->generatePipelineIR();
    578     mGrepDriver->deallocateBuffers();
    579     idb->CreateRet(idb->getInt64(0));
    580     mGrepDriver->finalizeObject();
     492    auto & idb = mGrepDriver.getBuilder();
     493
     494    auto E = mGrepDriver.makePipeline(
     495                // inputs
     496                {Binding{idb->getInt8Ty(), "useMMap"},
     497                Binding{idb->getInt32Ty(), "fileDescriptor"},
     498                Binding{idb->getIntAddrTy(), "callbackObject"},
     499                Binding{idb->getSizeTy(), "maxCount"}}
     500                ,// output
     501                {Binding{idb->getInt64Ty(), "countResult"}});
     502
     503    Scalar * const useMMap = E->getInputScalar("useMMap");
     504    Scalar * const fileDescriptor = E->getInputScalar("fileDescriptor");
     505
     506    StreamSet * const ByteStream = E->CreateStreamSet(1, ENCODING_BITS);
     507    E->CreateKernelCall<FDSourceKernel>(useMMap, fileDescriptor, ByteStream);
     508
     509    StreamSet * LineBreakStream;
     510    StreamSet * Matches;
     511    std::tie(LineBreakStream, Matches) = grepPipeline(E, ByteStream);
     512
     513    Scalar * const callbackObject = E->getInputScalar("callbackObject");
     514    Kernel * const scanMatchK = E->CreateKernelCall<ScanMatchKernel>(Matches, LineBreakStream, ByteStream, callbackObject);
     515    mGrepDriver.LinkFunction(scanMatchK, "accumulate_match_wrapper", accumulate_match_wrapper);
     516    mGrepDriver.LinkFunction(scanMatchK, "finalize_match_wrapper", finalize_match_wrapper);
     517
     518    E->setOutputScalar("countResult", E->CreateConstant(idb->getInt64(0)));
     519
     520    mMainMethod = E->compile();
    581521}
    582522
     
    597537
    598538uint64_t GrepEngine::doGrep(const std::string & fileName, std::ostringstream & strm) {
    599     typedef uint64_t (*GrepFunctionType)(bool useMMap, int32_t fileDescriptor, intptr_t callback_addr);
     539    typedef uint64_t (*GrepFunctionType)(bool useMMap, int32_t fileDescriptor, GrepCallBackObject *, size_t maxCount);
    600540    bool useMMap = mPreferMMap && canMMap(fileName);
    601 
    602     auto f = reinterpret_cast<GrepFunctionType>(mGrepDriver->getMain());
     541    auto f = reinterpret_cast<GrepFunctionType>(mMainMethod);
    603542
    604543    int32_t fileDescriptor = openFile(fileName, strm);
    605544    if (fileDescriptor == -1) return 0;
    606545    GrepCallBackObject handler;
    607     uint64_t grepResult = f(useMMap, fileDescriptor, reinterpret_cast<intptr_t>(&handler));
     546    uint64_t grepResult = f(useMMap, fileDescriptor, &handler, mMaxCount);
     547
    608548    close(fileDescriptor);
    609549    if (handler.binaryFileSignalled()) {
     
    642582
    643583uint64_t EmitMatchesEngine::doGrep(const std::string & fileName, std::ostringstream & strm) {
    644     typedef uint64_t (*GrepFunctionType)(bool useMMap, int32_t fileDescriptor, intptr_t accum_addr);
     584    typedef uint64_t (*GrepFunctionType)(bool useMMap, int32_t fileDescriptor, EmitMatch *, size_t maxCount);
    645585    bool useMMap = mPreferMMap && canMMap(fileName);
    646     auto f = reinterpret_cast<GrepFunctionType>(mGrepDriver->getMain());
     586    auto f = reinterpret_cast<GrepFunctionType>(mMainMethod);
    647587    int32_t fileDescriptor = openFile(fileName, strm);
    648588    if (fileDescriptor == -1) return 0;
    649589    EmitMatch accum(linePrefix(fileName), mShowLineNumbers, mInitialTab, strm);
    650     f(useMMap, fileDescriptor, reinterpret_cast<intptr_t>(&accum));
     590    f(useMMap, fileDescriptor, &accum, mMaxCount);
    651591    close(fileDescriptor);
    652592    if (accum.binaryFileSignalled()) {
     
    694634
    695635void * DoGrepThreadFunction(void *args) {
     636    assert (args);
    696637    return reinterpret_cast<GrepEngine *>(args)->DoGrepThreadMethod();
    697638}
     
    753694            printIdx = mNextFileToPrint++;
    754695        } else {
    755             mGrepDriver->performIncrementalCacheCleanupStep();
     696            ObjectCacheManager::performIncrementalCacheCleanupStep();
    756697        }
    757698        sched_yield();
     
    762703    } else {
    763704        // Always perform one final cache cleanup step.
    764         mGrepDriver->performIncrementalCacheCleanupStep();
     705        ObjectCacheManager::performIncrementalCacheCleanupStep();
    765706        if (mGrepStdIn) {
    766707            std::ostringstream s;
     
    775716   
    776717   
    777 InternalSearchEngine::InternalSearchEngine() :
     718InternalSearchEngine::InternalSearchEngine(BaseDriver &driver) :
    778719    mGrepRecordBreak(GrepRecordBreakKind::LF),
    779720    mCaseInsensitive(false),
    780     mGrepDriver(make_unique<ParabixDriver>("InternalEngine")) {}
    781    
    782 void InternalSearchEngine::grepCodeGen(re::RE * matchingRE, re::RE * excludedRE, MatchAccumulator * accum) {
    783     auto & idb = mGrepDriver->getBuilder();
    784     Module * M = idb->getModule();
    785    
     721    mGrepDriver(driver),
     722    mMainMethod(nullptr) {}
     723   
     724void InternalSearchEngine::grepCodeGen(re::RE * matchingRE, re::RE * excludedRE) {
     725    auto & idb = mGrepDriver.getBuilder();
    786726    mSaveSegmentPipelineParallel = codegen::SegmentPipelineParallel;
    787727    codegen::SegmentPipelineParallel = false;
    788     const unsigned segmentSize = codegen::BufferSegments * codegen::SegmentSize * codegen::ThreadNum;
    789728   
    790729    re::CC * breakCC = nullptr;
     
    810749        excludedRE = toUTF8(excludedRE);
    811750    }
    812     Function * mainFunc = cast<Function>(M->getOrInsertFunction("Main", idb->getVoidTy(), idb->getInt8PtrTy(), idb->getSizeTy(), nullptr));
    813     mainFunc->setCallingConv(CallingConv::C);
    814     auto args = mainFunc->arg_begin();
    815     Value * const buffer = &*(args++);
    816     buffer->setName("buffer");
    817     Value * length = &*(args++);
    818     length->setName("length");
    819    
    820     idb->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
    821     StreamSetBuffer * ByteStream = mGrepDriver->addBuffer<ExternalBuffer>(idb, idb->getStreamSetTy(1, 8));
    822     kernel::Kernel * sourceK = mGrepDriver->addKernelInstance<kernel::MemorySourceKernel>(idb);
    823     sourceK->setInitialArguments({buffer, length});
    824     mGrepDriver->makeKernelCall(sourceK, {}, {ByteStream});
    825     StreamSetBuffer * RecordBreakStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
    826     std::string RBname = (mGrepRecordBreak == GrepRecordBreakKind::Null) ? "Null" : "LF";
    827 
    828    
    829     StreamSetBuffer * BasisBits = nullptr;
     751
     752    auto E = mGrepDriver.makePipeline({Binding{idb->getInt8PtrTy(), "buffer"},
     753                                       Binding{idb->getSizeTy(), "length"},
     754                                       Binding{idb->getIntAddrTy(), "accumulator"}});
     755
     756    Scalar * const buffer = E->getInputScalar(0);
     757    Scalar * const length = E->getInputScalar(1);
     758    StreamSet * ByteStream = E->CreateStreamSet(1, 8);
     759    E->CreateKernelCall<MemorySourceKernel>(buffer, length, ByteStream);
     760
     761
     762    StreamSet * RecordBreakStream = E->CreateStreamSet();
     763    const auto RBname = (mGrepRecordBreak == GrepRecordBreakKind::Null) ? "Null" : "LF";
     764
     765   
     766    StreamSet * BasisBits = nullptr;
    830767   
    831768    if (matchAllLines && excludeNothing) {
    832         kernel::Kernel * breakK = mGrepDriver->addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, RBname, std::vector<re::CC *>{breakCC});
    833         mGrepDriver->makeKernelCall(breakK, {ByteStream}, {RecordBreakStream});
     769        E->CreateKernelCall<DirectCharacterClassKernelBuilder>(RBname, std::vector<re::CC *>{breakCC}, ByteStream, RecordBreakStream);
    834770    } else {
    835         BasisBits = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8, 1), segmentSize);
    836         kernel::Kernel * s2pk = mGrepDriver->addKernelInstance<kernel::S2PKernel>(idb);
    837         mGrepDriver->makeKernelCall(s2pk, {ByteStream}, {BasisBits});
    838        
    839         kernel::Kernel * breakK = mGrepDriver->addKernelInstance<kernel::ParabixCharacterClassKernelBuilder>(idb, RBname, std::vector<re::CC *>{breakCC}, 8);
    840         mGrepDriver->makeKernelCall(breakK, {BasisBits}, {RecordBreakStream});
    841     }
    842    
    843     std::vector<std::string> externalStreamNames;
    844     StreamSetBuffer * MatchingRecords = nullptr;
     771        BasisBits = E->CreateStreamSet(8);
     772        E->CreateKernelCall<S2PKernel>(ByteStream, BasisBits);
     773        E->CreateKernelCall<ParabixCharacterClassKernelBuilder>(RBname, std::vector<re::CC *>{breakCC}, BasisBits, RecordBreakStream);
     774    }
     775   
     776    StreamSet * MatchingRecords = nullptr;
    845777    if (matchAllLines) {
    846778        MatchingRecords = RecordBreakStream;
    847779    } else {
    848         StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
    849         kernel::Kernel * includeK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, matchingRE, externalStreamNames);
    850         mGrepDriver->makeKernelCall(includeK, {BasisBits}, {MatchResults});
    851         MatchingRecords = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
    852         kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
    853         mGrepDriver->makeKernelCall(matchedLinesK, {MatchResults, RecordBreakStream}, {MatchingRecords});
     780        StreamSet * MatchResults = E->CreateStreamSet();
     781        E->CreateKernelCall<ICGrepKernel>(matchingRE, BasisBits, MatchResults);
     782        MatchingRecords = E->CreateStreamSet();
     783        E->CreateKernelCall<MatchedLinesKernel>(MatchResults, RecordBreakStream, MatchingRecords);
    854784    }
    855785    if (!excludeNothing) {
    856         StreamSetBuffer * ExcludedResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
    857         kernel::Kernel * excludeK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, excludedRE, externalStreamNames);
    858         mGrepDriver->makeKernelCall(excludeK, {BasisBits}, {ExcludedResults});
    859         StreamSetBuffer * ExcludedRecords = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
    860         kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
    861         mGrepDriver->makeKernelCall(matchedLinesK, {ExcludedResults, RecordBreakStream}, {ExcludedRecords});
    862 
    863         kernel::Kernel * invertK = mGrepDriver->addKernelInstance<kernel::InvertMatchesKernel>(idb);
     786        StreamSet * ExcludedResults = E->CreateStreamSet();
     787        E->CreateKernelCall<ICGrepKernel>(excludedRE, BasisBits, ExcludedResults);
     788        StreamSet * ExcludedRecords = E->CreateStreamSet();
     789        E->CreateKernelCall<MatchedLinesKernel>(ExcludedResults, RecordBreakStream, ExcludedRecords);
     790
    864791        if (!matchAllLines) {
    865             StreamSetBuffer * nonExcluded = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
    866             mGrepDriver->makeKernelCall(invertK, {ExcludedRecords, RecordBreakStream}, {nonExcluded});
    867             StreamSetBuffer * included = MatchingRecords;
    868             MatchingRecords = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
    869             kernel::Kernel * streamsIntersectK = mGrepDriver->addKernelInstance<kernel::StreamsIntersect>(idb, 1, 2);
    870             mGrepDriver->makeKernelCall(streamsIntersectK, {included, nonExcluded}, {MatchingRecords});
    871         }
    872         else {
    873             MatchingRecords = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
    874             mGrepDriver->makeKernelCall(invertK, {ExcludedRecords, RecordBreakStream}, {MatchingRecords});
    875         }
    876     }
    877     kernel::Kernel * scanMatchK = mGrepDriver->addKernelInstance<kernel::ScanMatchKernel>(idb);
    878     scanMatchK->setInitialArguments({ConstantInt::get(idb->getIntAddrTy(), reinterpret_cast<intptr_t>(accum))});
    879     mGrepDriver->makeKernelCall(scanMatchK, {MatchingRecords, RecordBreakStream, ByteStream}, {});
    880     mGrepDriver->LinkFunction(*scanMatchK, "accumulate_match_wrapper", &accumulate_match_wrapper);
    881     mGrepDriver->LinkFunction(*scanMatchK, "finalize_match_wrapper", &finalize_match_wrapper);
    882     mGrepDriver->generatePipelineIR();
    883     mGrepDriver->deallocateBuffers();
    884     idb->CreateRetVoid();
    885     mGrepDriver->finalizeObject();
    886 }
    887 
    888 void InternalSearchEngine::doGrep(const char * search_buffer, size_t bufferLength) {
    889     typedef void (*GrepFunctionType)(const char * buffer, const size_t length);
    890     auto f = reinterpret_cast<GrepFunctionType>(mGrepDriver->getMain());
    891     f(search_buffer, bufferLength);
     792            StreamSet * nonExcluded = E->CreateStreamSet();
     793            E->CreateKernelCall<InvertMatchesKernel>(ExcludedRecords, RecordBreakStream, nonExcluded);
     794            StreamSet * const included = MatchingRecords;
     795            MatchingRecords = E->CreateStreamSet();
     796            E->CreateKernelCall<StreamsIntersect>(std::vector<StreamSet *>{included, nonExcluded}, MatchingRecords);
     797        } else {
     798            MatchingRecords = E->CreateStreamSet();
     799            E->CreateKernelCall<InvertMatchesKernel>(ExcludedRecords, RecordBreakStream, MatchingRecords);
     800        }
     801    }
     802
     803    Kernel * scanMatchK = E->CreateKernelCall<ScanMatchKernel>(MatchingRecords, RecordBreakStream, ByteStream, E->getInputScalar(2));
     804    mGrepDriver.LinkFunction(scanMatchK, "accumulate_match_wrapper", accumulate_match_wrapper);
     805    mGrepDriver.LinkFunction(scanMatchK, "finalize_match_wrapper", finalize_match_wrapper);
     806
     807    mMainMethod = E->compile();
     808}
     809
     810void InternalSearchEngine::doGrep(const char * search_buffer, size_t bufferLength, MatchAccumulator & accum) {
     811    typedef void (*GrepFunctionType)(const char * buffer, const size_t length, MatchAccumulator *);
     812    auto f = reinterpret_cast<GrepFunctionType>(mMainMethod);
     813    f(search_buffer, bufferLength, &accum);
    892814    codegen::SegmentPipelineParallel = mSaveSegmentPipelineParallel;
    893815}
     
    895817GrepEngine::~GrepEngine() { }
    896818
     819InternalSearchEngine::InternalSearchEngine(const std::unique_ptr<grep::GrepEngine> & engine)
     820: InternalSearchEngine(engine->mGrepDriver) {
     821
     822}
     823
    897824InternalSearchEngine::~InternalSearchEngine() { }
    898825
  • icGREP/icgrep-devel/icgrep/grep/grep_engine.h

    r5998 r6184  
    1 
     1
    22/*
    33 *  Copyright (c) 2018 International Characters.
     
    88#define GREP_ENGINE_H
    99#include <grep_interface.h>
    10 #include <kernels/streamset.h>
     10//#include <kernels/streamset.h>
    1111#include <cc/multiplex_CCs.h>
    1212#include <string>
     
    2020namespace re { class RE; }
    2121namespace llvm { namespace cl { class OptionCategory; } }
    22 class Driver;
     22namespace kernel { class PipelineBuilder; }
     23namespace kernel { class StreamSet; }
     24class BaseDriver;
    2325
    2426
     
    2628   
    2729enum class GrepRecordBreakKind {Null, LF, Unicode};
     30
     31class InternalSearchEngine;
    2832
    2933enum GrepSignal : unsigned {BinaryFile};
     
    5357class GrepEngine {
    5458    enum class FileStatus {Pending, GrepComplete, PrintComplete};
     59    friend class InternalSearchEngine;
    5560public:
    5661
    5762    enum class EngineKind {QuietMode, MatchOnly, CountOnly, EmitMatches};
    5863
    59     GrepEngine();
     64    GrepEngine(BaseDriver & driver);
    6065
    6166    virtual ~GrepEngine() = 0;
     
    8489
    8590protected:
    86     std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> grepPipeline(parabix::StreamSetBuffer * ByteStream, llvm::Value * callback_object_addr);
     91    std::pair<kernel::StreamSet *, kernel::StreamSet *> grepPipeline(const std::unique_ptr<kernel::PipelineBuilder> & P,
     92                                                                     kernel::StreamSet * ByteStream);
    8793
    8894    virtual uint64_t doGrep(const std::string & fileName, std::ostringstream & strm);
     
    105111    int mMaxCount;
    106112    bool mGrepStdIn;
    107     std::unique_ptr<Driver> mGrepDriver;
     113    BaseDriver & mGrepDriver;
     114    void * mMainMethod;
    108115
    109116    std::atomic<unsigned> mNextFileToGrep;
     
    118125    std::set<re::Name *> mUnicodeProperties;
    119126    re::CC * mBreakCC;
    120     std::unique_ptr<cc::MultiplexedAlphabet> mpx;
    121127    std::string mFileSuffix;
    122128    bool mMoveMatchesToEOL;
     
    132138    friend class EmitMatchesEngine;
    133139public:
    134     EmitMatch(std::string linePrefix, bool showLineNumbers, bool initialTab, std::ostringstream & strm) : mLinePrefix(linePrefix),
     140    EmitMatch(std::string linePrefix, bool showLineNumbers, bool initialTab, std::ostringstream & strm)
     141        : mLinePrefix(linePrefix),
    135142        mShowLineNumbers(showLineNumbers),
    136143        mInitialTab(initialTab),
     
    149156};
    150157
    151 class EmitMatchesEngine : public GrepEngine {
    152 public:
    153     EmitMatchesEngine();
     158class EmitMatchesEngine final : public GrepEngine {
     159public:
     160    EmitMatchesEngine(BaseDriver & driver);
    154161    void grepCodeGen() override;
    155162private:
     
    157164};
    158165
    159 class CountOnlyEngine : public GrepEngine {
    160 public:
    161     CountOnlyEngine();
     166class CountOnlyEngine final : public GrepEngine {
     167public:
     168    CountOnlyEngine(BaseDriver & driver);
    162169private:
    163170    void showResult(uint64_t grepResult, const std::string & fileName, std::ostringstream & strm) override;
    164171};
    165172
    166 class MatchOnlyEngine : public GrepEngine {
    167 public:
    168     MatchOnlyEngine(bool showFilesWithoutMatch, bool useNullSeparators);
     173class MatchOnlyEngine final : public GrepEngine {
     174public:
     175    MatchOnlyEngine(BaseDriver & driver, bool showFilesWithoutMatch, bool useNullSeparators);
    169176private:
    170177    void showResult(uint64_t grepResult, const std::string & fileName, std::ostringstream & strm) override;
     
    172179};
    173180
    174 class QuietModeEngine : public GrepEngine {
    175 public:
    176     QuietModeEngine();
     181class QuietModeEngine final : public GrepEngine {
     182public:
     183    QuietModeEngine(BaseDriver & driver);
    177184};
    178185
     
    181188class InternalSearchEngine {
    182189public:
    183     InternalSearchEngine();
     190    InternalSearchEngine(BaseDriver & driver);
     191
     192    InternalSearchEngine(const std::unique_ptr<grep::GrepEngine> & engine);
     193
    184194    ~InternalSearchEngine();
    185195   
     
    187197    void setCaseInsensitive()  {mCaseInsensitive = true;}
    188198   
    189     void grepCodeGen(re::RE * matchingRE, re::RE * invertedRE, MatchAccumulator * accum);
    190    
    191     void doGrep(const char * search_buffer, size_t bufferLength);
     199    void grepCodeGen(re::RE * matchingRE, re::RE * invertedRE);
     200   
     201    void doGrep(const char * search_buffer, size_t bufferLength, MatchAccumulator & accum);
    192202   
    193203private:
     
    195205    bool mCaseInsensitive;
    196206    bool mSaveSegmentPipelineParallel;
    197 
    198     std::unique_ptr<Driver> mGrepDriver;
     207    BaseDriver & mGrepDriver;
     208    void * mMainMethod;
    199209};
    200210   
  • icGREP/icgrep-devel/icgrep/grep_interface.cpp

    r5999 r6184  
    194194//
    195195static void icgrep_error_handler(void *UserData, const std::string &Message, bool GenCrashDiag) {
    196 #ifndef NDEBUG
    197         throw std::runtime_error(Message);
    198 #else
    199         // Modified from LLVM's internal report_fatal_error logic.
    200         SmallVector<char, 64> Buffer;
    201         raw_svector_ostream OS(Buffer);
    202         OS << "icgrep ERROR: " << Message << "\n";
    203         StringRef MessageStr = OS.str();
    204         ssize_t written = ::write(2, MessageStr.data(), MessageStr.size());
    205         (void)written; // If something went wrong, we deliberately just give up.
    206         // Run the interrupt handlers to make sure any special cleanups get done, in
    207         // particular that we remove files registered with RemoveFileOnSignal.
    208         llvm::sys::RunInterruptHandlers();
    209         exit(InternalFailureCode);
    210 #endif
     196    // Modified from LLVM's internal report_fatal_error logic.
     197    #ifndef NDEBUG
     198    throw std::runtime_error(Message);
     199    #else
     200    SmallVector<char, 64> Buffer;
     201    raw_svector_ostream OS(Buffer);
     202    OS << "icgrep ERROR: " << Message << "\n";
     203    const auto MessageStr = OS.str();
     204    ssize_t written = ::write(2, MessageStr.data(), MessageStr.size());
     205    (void)written; // If something went wrong, we deliberately just give up.
     206    // Run the interrupt handlers to make sure any special cleanups get done, in
     207    // particular that we remove files registered with RemoveFileOnSignal.
     208    llvm::sys::RunInterruptHandlers();
     209    exit(InternalFailureCode);
     210    #endif
    211211}
    212    
    213212
    214213void InitializeCommandLineInterface(int argc, char *argv[]) {
  • icGREP/icgrep-devel/icgrep/icgrep.cpp

    r5992 r6184  
    3131#include <fcntl.h>
    3232#include <llvm/ADT/STLExtras.h> // for make_unique
     33#include <toolchain/cpudriver.h>
    3334
    3435using namespace llvm;
     
    120121    if (inputFiles.empty()) {
    121122        argv::UseStdIn = true;
    122     }
    123     else if ((allFiles.size() > 1) && !argv::NoFilenameFlag) {
     123    } else if ((allFiles.size() > 1) && !argv::NoFilenameFlag) {
    124124        argv::WithFilenameFlag = true;
    125125    }
    126126
    127     std::unique_ptr<grep::GrepEngine> grepEngine;
    128    
     127    CPUDriver driver("icgrep");
     128
     129    std::unique_ptr<grep::GrepEngine> grep;
     130
    129131    switch (argv::Mode) {
    130132        case argv::NormalMode:
    131             grepEngine = make_unique<grep::EmitMatchesEngine>();
    132             if (argv::MaxCountFlag) grepEngine->setMaxCount(argv::MaxCountFlag);
    133             if (argv::WithFilenameFlag) grepEngine->showFileNames();
    134             if (argv::LineNumberFlag) grepEngine->showLineNumbers();
    135             if (argv::InitialTabFlag) grepEngine->setInitialTab();
     133            grep = make_unique<grep::EmitMatchesEngine>(driver);
     134            if (argv::MaxCountFlag) grep->setMaxCount(argv::MaxCountFlag);
     135            if (argv::WithFilenameFlag) grep->showFileNames();
     136            if (argv::LineNumberFlag) grep->showLineNumbers();
     137            if (argv::InitialTabFlag) grep->setInitialTab();
    136138           break;
    137139        case argv::CountOnly:
    138             grepEngine = make_unique<grep::CountOnlyEngine>();
    139             if (argv::WithFilenameFlag) grepEngine->showFileNames();
    140             if (argv::MaxCountFlag) grepEngine->setMaxCount(argv::MaxCountFlag);
     140            grep = make_unique<grep::CountOnlyEngine>(driver);
     141            if (argv::WithFilenameFlag) grep->showFileNames();
     142            if (argv::MaxCountFlag) grep->setMaxCount(argv::MaxCountFlag);
    141143           break;
    142144        case argv::FilesWithMatch:
    143145        case argv::FilesWithoutMatch:
    144             grepEngine = make_unique<grep::MatchOnlyEngine>(argv::Mode == argv::FilesWithMatch, argv::NullFlag);
     146            grep = make_unique<grep::MatchOnlyEngine>(driver, argv::Mode == argv::FilesWithMatch, argv::NullFlag);
    145147            break;
    146148        case argv::QuietMode:
    147             grepEngine = make_unique<grep::QuietModeEngine>();
     149            grep = make_unique<grep::QuietModeEngine>(driver);
    148150            break;
    149151        default: llvm_unreachable("Invalid grep mode!");
    150152    }
    151     if (argv::IgnoreCaseFlag) grepEngine->setCaseInsensitive();
    152     if (argv::InvertMatchFlag) grepEngine->setInvertMatches();
     153    if (argv::IgnoreCaseFlag) grep->setCaseInsensitive();
     154    if (argv::InvertMatchFlag) grep->setInvertMatches();
    153155    if (argv::UnicodeLinesFlag) {
    154         grepEngine->setRecordBreak(grep::GrepRecordBreakKind::Unicode);
     156        grep->setRecordBreak(grep::GrepRecordBreakKind::Unicode);
    155157    } else if (argv::NullDataFlag) {
    156         grepEngine->setRecordBreak(grep::GrepRecordBreakKind::Null);
     158        grep->setRecordBreak(grep::GrepRecordBreakKind::Null);
    157159    } else {
    158         grepEngine->setRecordBreak(grep::GrepRecordBreakKind::LF);
     160        grep->setRecordBreak(grep::GrepRecordBreakKind::LF);
    159161    }
    160     grepEngine->setStdinLabel(argv::LabelFlag);
    161     if (argv::UseStdIn) grepEngine->setGrepStdIn();
    162     if (argv::NoMessagesFlag) grepEngine->suppressFileMessages();
    163     if (argv::MmapFlag) grepEngine->setPreferMMap();
    164     grepEngine->setBinaryFilesOption(argv::BinaryFilesFlag);
    165     grepEngine->initREs(REs);
    166     grepEngine->grepCodeGen();
    167     grepEngine->initFileResult(allFiles);
    168     bool matchFound = grepEngine->searchAllFiles();
    169    
     162
     163    grep->setStdinLabel(argv::LabelFlag);
     164    if (argv::UseStdIn) grep->setGrepStdIn();
     165    if (argv::NoMessagesFlag) grep->suppressFileMessages();
     166    if (argv::MmapFlag) grep->setPreferMMap();
     167    grep->setBinaryFilesOption(argv::BinaryFilesFlag);
     168    grep->initREs(REs);
     169    grep->grepCodeGen();
     170    grep->initFileResult(allFiles);
     171    const bool matchFound = grep->searchAllFiles();
     172
    170173    return matchFound ? argv::MatchFoundExitCode : argv::MatchNotFoundExitCode;
    171174}
  • icGREP/icgrep-devel/icgrep/idisa_test.cpp

    r6058 r6184  
    1414#include <kernels/kernel_builder.h>
    1515#include <IR_Gen/idisa_target.h>
    16 #include <kernels/interface.h>
    1716#include <kernels/streamset.h>
    1817#include <kernels/source_kernel.h>
     
    2221#include <toolchain/toolchain.h>
    2322#include <toolchain/cpudriver.h>
     23#include <kernels/pipeline_builder.h>
    2424#include <sys/stat.h>
    2525#include <fcntl.h>
     
    2828
    2929using namespace llvm;
     30using namespace kernel;
    3031
    3132static cl::OptionCategory testFlags("Command Flags", "test options");
     
    4243static cl::opt<int> Immediate("i", cl::desc("Immediate value for mvmd_dslli"), cl::init(1));
    4344
    44 class ShiftLimitKernel : public kernel::BlockOrientedKernel {
     45class ShiftLimitKernel : public BlockOrientedKernel {
    4546public:
    46     ShiftLimitKernel(const std::unique_ptr<kernel::KernelBuilder> & b, unsigned fw, unsigned limit);
     47    ShiftLimitKernel(const std::unique_ptr<KernelBuilder> & b, unsigned fw, unsigned limit, StreamSet * input, StreamSet * output);
    4748    bool isCachable() const override { return true; }
    4849    bool hasSignature() const override { return false; }
    4950protected:
    50     void generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & kb) override;
     51    void generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & kb) override;
    5152private:
    5253    const unsigned mTestFw;
     
    5455};
    5556
    56 ShiftLimitKernel::ShiftLimitKernel(const std::unique_ptr<kernel::KernelBuilder> & b, unsigned fw, unsigned limit)
    57 : kernel::BlockOrientedKernel("shiftLimit" + std::to_string(fw) + "_" + std::to_string(limit),
    58                               {kernel::Binding{b->getStreamSetTy(1, fw), "shiftOperand"}},
    59                               {kernel::Binding{b->getStreamSetTy(1, fw), "limitedShift"}},
     57ShiftLimitKernel::ShiftLimitKernel(const std::unique_ptr<KernelBuilder> & b, unsigned fw, unsigned limit, StreamSet * input, StreamSet * output)
     58: BlockOrientedKernel("shiftLimit" + std::to_string(fw) + "_" + std::to_string(limit),
     59                              {Binding{"shiftOperand", input}},
     60                              {Binding{"limitedShift", output}},
    6061                              {}, {}, {}),
    6162mTestFw(fw), mShiftLimit(limit) {}
    6263
    63 void ShiftLimitKernel::generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & kb) {
     64void ShiftLimitKernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & kb) {
    6465    Type * fwTy = kb->getIntNTy(mTestFw);
    6566    Constant * const ZeroConst = kb->getSize(0);
     
    7071}
    7172
    72 class IdisaBinaryOpTestKernel : public kernel::MultiBlockKernel {
     73class IdisaBinaryOpTestKernel : public MultiBlockKernel {
    7374public:
    74     IdisaBinaryOpTestKernel(const std::unique_ptr<kernel::KernelBuilder> & b, std::string idisa_op, unsigned fw, unsigned imm=0);
     75    IdisaBinaryOpTestKernel(const std::unique_ptr<KernelBuilder> &, std::string idisa_op, unsigned fw, unsigned imm,
     76                            StreamSet * Operand1, StreamSet * Operand2, StreamSet * result);
    7577    bool isCachable() const override { return true; }
    7678    bool hasSignature() const override { return false; }
    7779protected:
    78     void generateMultiBlockLogic(const std::unique_ptr<kernel::KernelBuilder> & kb, llvm::Value * const numOfStrides) override;
     80    void generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & kb, llvm::Value * const numOfStrides) override;
    7981private:
    8082    const std::string mIdisaOperation;
     
    8385};
    8486
    85 IdisaBinaryOpTestKernel::IdisaBinaryOpTestKernel(const std::unique_ptr<kernel::KernelBuilder> & b, std::string idisa_op, unsigned fw, unsigned imm)
    86 : kernel::MultiBlockKernel(idisa_op + std::to_string(fw) + "_test",
    87      {kernel::Binding{b->getStreamSetTy(1, 1), "operand1"}, kernel::Binding{b->getStreamSetTy(1, 1), "operand2"}},
    88      {kernel::Binding{b->getStreamSetTy(1, 1), "result"}},
     87IdisaBinaryOpTestKernel::IdisaBinaryOpTestKernel(const std::unique_ptr<KernelBuilder> & /* b */, std::string idisa_op, unsigned fw, unsigned imm,
     88                                                 StreamSet * Operand1, StreamSet * Operand2, StreamSet * result)
     89: MultiBlockKernel(idisa_op + std::to_string(fw) + "_test",
     90     {Binding{"operand1", Operand1}, Binding{"operand2", Operand2}},
     91     {Binding{"result", result}},
    8992     {}, {}, {}),
    90 mIdisaOperation(idisa_op), mTestFw(fw), mImmediateShift(imm) {}
    91 
    92 void IdisaBinaryOpTestKernel::generateMultiBlockLogic(const std::unique_ptr<kernel::KernelBuilder> & kb, llvm::Value * const numOfBlocks) {
     93mIdisaOperation(std::move(idisa_op)), mTestFw(fw), mImmediateShift(imm) {}
     94
     95void IdisaBinaryOpTestKernel::generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & kb, llvm::Value * const numOfBlocks) {
    9396    BasicBlock * entry = kb->GetInsertBlock();
    9497    BasicBlock * processBlock = kb->CreateBasicBlock("processBlock");
     
    163166}
    164167
    165 class IdisaBinaryOpCheckKernel : public kernel::BlockOrientedKernel {
     168class IdisaBinaryOpCheckKernel : public BlockOrientedKernel {
    166169public:
    167     IdisaBinaryOpCheckKernel(const std::unique_ptr<kernel::KernelBuilder> & b, std::string idisa_op, unsigned fw, unsigned imm=0);
     170    IdisaBinaryOpCheckKernel(const std::unique_ptr<KernelBuilder> & b, std::string idisa_op, unsigned fw, unsigned imm,
     171                             StreamSet * Operand1, StreamSet * Operand2, StreamSet * result,
     172                             StreamSet * expected, Scalar * failures);
    168173    bool isCachable() const override { return true; }
    169174    bool hasSignature() const override { return false; }
    170175protected:
    171     void generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & kb) override;
     176    void generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & kb) override;
    172177private:
    173178    const std::string mIdisaOperation;
     
    176181};
    177182
    178 IdisaBinaryOpCheckKernel::IdisaBinaryOpCheckKernel(const std::unique_ptr<kernel::KernelBuilder> & b, std::string idisa_op, unsigned fw, unsigned imm)
    179 : kernel::BlockOrientedKernel(idisa_op + std::to_string(fw) + "_check" + std::to_string(QuietMode),
    180                            {kernel::Binding{b->getStreamSetTy(1, 1), "operand1"},
    181                             kernel::Binding{b->getStreamSetTy(1, 1), "operand2"},
    182                             kernel::Binding{b->getStreamSetTy(1, 1), "test_result"}},
    183                            {kernel::Binding{b->getStreamSetTy(1, 1), "expected_result"}},
    184                            {}, {kernel::Binding{b->getSizeTy(), "totalFailures"}}, {}),
     183IdisaBinaryOpCheckKernel::IdisaBinaryOpCheckKernel(const std::unique_ptr<KernelBuilder> & /* b */, std::string idisa_op, unsigned fw, unsigned imm,
     184                                                   StreamSet * Operand1, StreamSet * Operand2, StreamSet * result,
     185                                                   StreamSet * expected, Scalar * failures)
     186: BlockOrientedKernel(idisa_op + std::to_string(fw) + "_check" + std::to_string(QuietMode),
     187                           {Binding{"operand1", Operand1},
     188                            Binding{"operand2", Operand2},
     189                            Binding{"test_result", result}},
     190                           {Binding{"expected_result", expected}},
     191                           {}, {Binding{"totalFailures", failures}}, {}),
    185192mIdisaOperation(idisa_op), mTestFw(fw), mImmediateShift(imm) {}
    186193
    187 void IdisaBinaryOpCheckKernel::generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & kb) {
     194void IdisaBinaryOpCheckKernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & kb) {
    188195    Type * fwTy = kb->getIntNTy(mTestFw);
    189196    BasicBlock * reportFailure = kb->CreateBasicBlock("reportFailure");
     
    346353typedef size_t (*IDISAtestFunctionType)(int32_t fd1, int32_t fd2);
    347354
    348 using namespace parabix;
    349 
    350 void pipelineGen(ParabixDriver & pxDriver) {
    351 
    352     auto & idb = pxDriver.getBuilder();
    353     Module * m = idb->getModule();
    354     Value * useMMap = idb->CreateZExt(idb->getTrue(), idb->getInt8Ty());
    355     const auto bufferSize = codegen::SegmentSize * codegen::BufferSegments;
    356    
    357     Type * const int32Ty = idb->getInt32Ty();
    358     Type * const sizeTy = idb->getSizeTy();
    359 
    360     FunctionType * const mainType = FunctionType::get(sizeTy, {int32Ty, int32Ty}, false);
    361     Function * const main = cast<Function>(m->getOrInsertFunction("Main", mainType));
    362     main->setCallingConv(CallingConv::C);
    363     Function::arg_iterator args = main->arg_begin();   
    364     Value * const fileDecriptor1 = &*(args++);
    365     fileDecriptor1->setName("operand1FileDecriptor");
    366     Value * const fileDecriptor2 = &*(args++);
    367     fileDecriptor2->setName("operand2FileDecriptor");
    368 
    369     idb->SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", main,0));
    370 
    371     StreamSetBuffer * Operand1HexStream = pxDriver.addBuffer<ExternalBuffer>(idb, idb->getStreamSetTy(1, 8));
    372     kernel::Kernel * sourceK1 = pxDriver.addKernelInstance<kernel::FDSourceKernel>(idb);
    373     sourceK1->setInitialArguments({useMMap, fileDecriptor1});
    374     pxDriver.makeKernelCall(sourceK1, {}, {Operand1HexStream});
    375    
    376     StreamSetBuffer * Operand1BitStream = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), bufferSize);
    377     kernel::Kernel * hexbinK = pxDriver.addKernelInstance<kernel::HexToBinary>(idb);
    378     pxDriver.makeKernelCall(hexbinK, {Operand1HexStream}, {Operand1BitStream});
    379    
    380     StreamSetBuffer * Operand2HexStream = pxDriver.addBuffer<ExternalBuffer>(idb, idb->getStreamSetTy(1, 8));
    381     kernel::Kernel * sourceK2 = pxDriver.addKernelInstance<kernel::FDSourceKernel>(idb);
    382     sourceK2->setInitialArguments({useMMap, fileDecriptor2});
    383     pxDriver.makeKernelCall(sourceK2, {}, {Operand2HexStream});
    384    
    385     StreamSetBuffer * Operand2BitStream = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), bufferSize);
    386     kernel::Kernel * hexbinK2 = pxDriver.addKernelInstance<kernel::HexToBinary>(idb);
    387     pxDriver.makeKernelCall(hexbinK2, {Operand2HexStream}, {Operand2BitStream});
    388    
     355StreamSet * readHexToBinary(std::unique_ptr<PipelineBuilder> & P, const std::string & fd) {
     356    StreamSet * const hexStream = P->CreateStreamSet(1, 8);
     357    Scalar * const fileDecriptor = P->getInputScalar(fd);
     358    P->CreateKernelCall<MMapSourceKernel>(fileDecriptor, hexStream);
     359    StreamSet * const bitStream = P->CreateStreamSet(1, 1);
     360    P->CreateKernelCall<HexToBinary>(hexStream, bitStream);
     361    return bitStream;
     362}
     363
     364inline StreamSet * applyShiftLimit(std::unique_ptr<PipelineBuilder> & P, StreamSet * const input) {
    389365    if (ShiftLimit > 0) {
    390         StreamSetBuffer * PreLimitBitStream = Operand2BitStream;
    391         Operand2BitStream = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), bufferSize);
    392         kernel::Kernel * limitK = pxDriver.addKernelInstance<ShiftLimitKernel>(idb, TestFieldWidth, ShiftLimit);
    393         pxDriver.makeKernelCall(limitK, {PreLimitBitStream}, {Operand2BitStream});
    394     }
    395 
    396     StreamSetBuffer * ResultBitStream = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), bufferSize);
    397     kernel::Kernel * testK = pxDriver.addKernelInstance<IdisaBinaryOpTestKernel>(idb, TestOperation, TestFieldWidth, Immediate);
    398     pxDriver.makeKernelCall(testK, {Operand1BitStream, Operand2BitStream}, {ResultBitStream});
    399    
    400     StreamSetBuffer * ExpectedResultBitStream = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), bufferSize);
    401     kernel::Kernel * checkK = pxDriver.addKernelInstance<IdisaBinaryOpCheckKernel>(idb, TestOperation, TestFieldWidth, Immediate);
    402     pxDriver.makeKernelCall(checkK, {Operand1BitStream, Operand2BitStream, ResultBitStream}, {ExpectedResultBitStream});
    403    
     366        StreamSet * output = P->CreateStreamSet(1, 1);
     367        P->CreateKernelCall<ShiftLimitKernel>(TestFieldWidth, ShiftLimit, input, output);
     368        return output;
     369    }
     370    return input;
     371}
     372
     373IDISAtestFunctionType pipelineGen(CPUDriver & pxDriver) {
     374
     375    auto & b = pxDriver.getBuilder();
     376
     377    Type * const sizeTy = b->getSizeTy();
     378    Type * const int32Ty = b->getInt32Ty();
     379
     380    Bindings inputs;
     381    inputs.emplace_back(int32Ty, "operand1FileDecriptor");
     382    inputs.emplace_back(int32Ty, "operand2FileDecriptor");
    404383    if (!TestOutputFile.empty()) {
    405         StreamSetBuffer * ResultHexStream = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 8), bufferSize);
    406         kernel::Kernel * binhexK = pxDriver.addKernelInstance<kernel::BinaryToHex>(idb);
    407         pxDriver.makeKernelCall(binhexK, {ResultBitStream}, {ResultHexStream});
    408         kernel::Kernel * outK = pxDriver.addKernelInstance<kernel::FileSink>(idb, 8);
    409         Value * fName = idb->CreatePointerCast(idb->GetString(TestOutputFile.c_str()), idb->getInt8PtrTy());
    410         outK->setInitialArguments({fName});
    411         pxDriver.makeKernelCall(outK, {ResultHexStream}, {});
    412    }
    413    
    414     pxDriver.generatePipelineIR();
    415     idb->setKernel(checkK);
    416     Value * totalFailures = idb->getAccumulator("totalFailures");
    417    
    418     pxDriver.deallocateBuffers();
    419     idb->CreateRet(totalFailures);
    420     pxDriver.finalizeObject();
     384        inputs.emplace_back(b->getInt8PtrTy(), "outputFileName");
     385    }
     386
     387    auto P = pxDriver.makePipeline(std::move(inputs), {Binding{sizeTy, "totalFailures"}});
     388
     389
     390    StreamSet * const Operand1BitStream = readHexToBinary(P, "operand1FileDecriptor");
     391    StreamSet * const Operand2BitStream = applyShiftLimit(P, readHexToBinary(P, "operand2FileDecriptor"));
     392
     393    StreamSet * const ResultBitStream = P->CreateStreamSet(1, 1);
     394
     395    P->CreateKernelCall<IdisaBinaryOpTestKernel>(TestOperation, TestFieldWidth, Immediate
     396                                                 , Operand1BitStream, Operand2BitStream
     397                                                 , ResultBitStream);
     398
     399    StreamSet * ExpectedResultBitStream = P->CreateStreamSet(1, 1);
     400
     401    P->CreateKernelCall<IdisaBinaryOpCheckKernel>(TestOperation, TestFieldWidth, Immediate
     402                                                 , Operand1BitStream, Operand2BitStream, ResultBitStream
     403                                                 , ExpectedResultBitStream, P->getOutputScalar("totalFailures"));
     404
     405    if (!TestOutputFile.empty()) {
     406        StreamSet * ResultHexStream = P->CreateStreamSet(1, 8);
     407        P->CreateKernelCall<BinaryToHex>(ResultBitStream, ResultHexStream);
     408        Scalar * outputFileName = P->getInputScalar("outputFileName");
     409        P->CreateKernelCall<FileSink>(outputFileName, ResultHexStream);
     410    }
     411
     412    return reinterpret_cast<IDISAtestFunctionType>(P->compile());
    421413}
    422414
     
    424416    cl::ParseCommandLineOptions(argc, argv);
    425417    //codegen::SegmentSize = 1;
    426     ParabixDriver pxDriver("idisa_test");
    427     pipelineGen(pxDriver);
     418    CPUDriver pxDriver("idisa_test");
     419    auto idisaTestFunction = pipelineGen(pxDriver);
    428420   
    429     int32_t fd1 = openFile(Operand1TestFile, llvm::outs());
    430     int32_t fd2 = openFile(Operand2TestFile, llvm::outs());
    431    
    432     auto idisaTestFunction = reinterpret_cast<IDISAtestFunctionType>(pxDriver.getMain());
    433     size_t failure_count = idisaTestFunction(fd1, fd2);
     421    const int32_t fd1 = openFile(Operand1TestFile, llvm::outs());
     422    const int32_t fd2 = openFile(Operand2TestFile, llvm::outs());
     423    const size_t failure_count = idisaTestFunction(fd1, fd2);
    434424    if (!QuietMode) {
    435425        if (failure_count == 0) {
  • icGREP/icgrep-devel/icgrep/kernels/UCD_property_kernel.cpp

    r5887 r6184  
    1818
    1919
    20 UnicodePropertyKernelBuilder::UnicodePropertyKernelBuilder(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, re::Name * property_value_name)
     20UnicodePropertyKernelBuilder::UnicodePropertyKernelBuilder(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, re::Name * property_value_name, StreamSet *BasisBits, StreamSet * property)
    2121: PabloKernel(iBuilder,
    22               "UCD:" + property_value_name->getFullName(),
    23               {Binding{iBuilder->getStreamSetTy(8), "basis"}},
    24               {Binding{iBuilder->getStreamSetTy(1, 1), "property_stream", FixedRate()}}),
    25   mName(property_value_name)
    26 {
     22"UCD:" + property_value_name->getFullName(),
     23{Binding{"basis", BasisBits}},
     24{Binding{"property_stream", property}}),
     25  mName(property_value_name) {
     26
    2727}
    2828
  • icGREP/icgrep-devel/icgrep/kernels/UCD_property_kernel.h

    r5887 r6184  
    1414class UnicodePropertyKernelBuilder : public pablo::PabloKernel {
    1515public:
    16     UnicodePropertyKernelBuilder(const std::unique_ptr<kernel::KernelBuilder> & kb,
    17                                  re::Name * property_value_name);
     16    UnicodePropertyKernelBuilder(const std::unique_ptr<kernel::KernelBuilder> & kb, re::Name * property_value_name, StreamSet * BasisBits, StreamSet * property);
    1817    bool isCachable() const override { return true; }
    1918    bool hasSignature() const override { return false; }
  • icGREP/icgrep-devel/icgrep/kernels/alignedprint.cpp

    r5793 r6184  
    289289    auto width = minWidth;
    290290    for (const std::string & name : mNames) {
    291         mStreamSetInputs.emplace_back(builder->getStreamSetTy(0), name);
     291        mInputStreamSets.emplace_back(builder->getStreamSetTy(0), name);
    292292        width = std::max<unsigned>(name.length() + 5, width);
    293293    }
  • icGREP/icgrep-devel/icgrep/kernels/attributes.cpp

    r5782 r6184  
    22
    33#include <llvm/Support/raw_ostream.h>
     4#include <boost/preprocessor/stringize.hpp>
    45
    56namespace kernel {
     7
     8void Attribute::print(llvm::raw_ostream & out) const noexcept {
     9    #define NAME(DEF) \
     10        case KindId::DEF : out << BOOST_PP_STRINGIZE(DEF); break
     11    #define NAME_AMOUNT(DEF) \
     12        case KindId::DEF : out << BOOST_PP_STRINGIZE(DEF) << mAmount; break
     13    switch (getKind()) {
     14        NAME_AMOUNT(LookAhead);
     15        NAME_AMOUNT(LookBehind);
     16        NAME(Principal);
     17        NAME(Deferred);
     18        NAME(ZeroExtend);
     19        NAME(IndependentRegionBegin); NAME(IndependentRegionEnd);
     20        NAME(ConditionalRegionBegin); NAME(ConditionalRegionEnd);
     21        NAME(RequiresPopCountArray); NAME(RequiresNegatedPopCountArray);
     22        NAME_AMOUNT(Add);
     23        NAME_AMOUNT(RoundUpTo);
     24        NAME(ManagedBuffer);
     25        NAME(Misaligned);
     26        NAME_AMOUNT(BlockSize);
     27        NAME(ReverseRegionBegin); NAME(ReverseRegionEnd);
     28        NAME_AMOUNT(SliceOffset);
     29        NAME(Expandable);
     30        NAME(CanTerminateEarly);
     31        NAME(MustExplicitlyTerminate);
     32        NAME(SideEffecting);
     33        NAME(Family);
     34    }
     35    #undef NAME
     36    #undef NAME_AMOUNT
     37}
     38
     39void AttributeSet::print(llvm::raw_ostream & out) const noexcept {
     40    if (hasAttributes()) {
     41        char joiner = '{';
     42        for (const Attribute & a : getAttributes()) {
     43            out << joiner;
     44            a.print(out);
     45            joiner = ',';
     46        }
     47        out << '}';
     48    }
     49}
    650
    751Attribute & AttributeSet::addAttribute(Attribute attribute) {
  • icGREP/icgrep-devel/icgrep/kernels/attributes.h

    r6047 r6184  
    66#include <assert.h>
    77
     8namespace llvm { class raw_ostream; }
     9
    810namespace kernel {
    911
     
    1416        /** INPUT STREAM ATTRIBUTES **/
    1517
    16         LookAhead, /// NOT DONE
     18        LookAhead,
    1719
    1820        // A LookAhead(n) attribute on an input stream set S declares that the kernel
     
    6870        ZeroExtend, /// NOT DONE
    6971
    70         // If the available item count of an input stream it less than some other input
    71         // stream(s), it will be zero-extended to the length of the larger stream. If
    72         // this option is not set and the kernel does not have a MustExplicitlyTerminate
     72        // If the available item count of an input stream is less than some other input
     73        // stream(s), the stream will be zero-extended to the length of the larger stream.
     74        // If this option is not set and the kernel does not have a MustExplicitlyTerminate
    7375        // attribute, it will end once any input has been exhausted.
     76
     77        // NOTE: zero-extended streams are not considered by the pipeline when ascertaining
     78        // whether it is entering the final segment. At least one input stream must not be
     79        // zero-extended and a stream cannot have both Principal and ZeroExtend attributes.
    7480
    7581        IndependentRegionBegin, IndependentRegionEnd, /// NOT DONE
     
    100106        // uses of the streams and zero out any non-regions from the data.
    101107
    102         AlwaysConsume,
    103 
    104         // Always consume the input (i.e., use the lowerbound to determine whether to there
    105         // is enough data to execute a stride rather than the upper bound.)
     108        RequiresPopCountArray, RequiresNegatedPopCountArray,
     109
     110        // Marks whether a particular input stream requires a popcount or negated popcount
     111        // array for its own internal processing.
    106112
    107113        /** OUTPUT STREAM ATTRIBUTES **/
     
    116122        // A RoundUpTo(k) attribute indicates the final item count of this stream will
    117123        // be rounded up to the nearest multiple of k
     124
     125        ManagedBuffer,
     126
     127        // Generally, kernels do not require knowledge about how many items are consumed
     128        // from their produced streams or who is consuming them and instead rely on the
     129        // pipeline to manage their output buffers for them. The major exception are source
     130        // kernels since they produce data "autonomously" and may manage their memory
     131        // internally. Thus this attribute instructs both the kernel compiler and pipeline
     132        // that a particular output stream needs both the consumed item count and a pointer
     133        // to each of its consumers logical segment number for its internal logic.
    118134
    119135        /** INPUT/OUTPUT STREAM ATTRIBUTES **/
     
    210226
    211227
    212         RequiresLinearAccess, PermitsNonLinearAccess,
    213 
    214         // Indicates whether all unprocessed / consumed space is safely accessible by the
    215         // MultiBlockKernel code. By default, input streams and any output stream in which
    216         // we know a priori exactly how much data will be written into the overflow buffer
    217         // are opt-out and all others are opt-in. The reason is that writing non-linear
    218         // output at a non-Fixed rate be costly to manage. E.g.,
    219 
    220         //                             BUFFER          v   OVERFLOW
    221         //                |?????############...........###|#####???|
    222         //                                 n           p  k    m
    223 
    224         // Suppose from a given offset p, we write n items but only have space for k items
    225         // in the stream set buffer. Assuming we wrote more than one stride, we know that
    226         // there are (m - k) items in the overflow but may not know what our value of m is
    227         // unless we can derive the relationship between m and n a priori. The problem is
    228         // that the kernel will write the second stride's output at the (m - k)-th position
    229         // of the 0-th block and but final reported count will be n. We can safely mitigate
    230         // this in many ways:
    231 
    232         // (1) when we detect that we could write into the overflow region of the buffer,
    233         // we can zero out the memory of both the overflow *and* the 0-th block of the
    234         // buffer then combine both by OR-ing the streams and writing them to the 0-th
    235         // block. The advantage is we require no extra memory but the disadvantage is that
    236         // the kernel is now relies on the pipeline to ensure that whenever we may write
    237         // into the overflow that the 0-th block is fully consumed.
    238 
    239         // (2) the overflow region is equal to the size of the buffer (i.e., employ double
    240         // buffering.) The advantage of this is the kernel makes no assumptions about the
    241         // pipeline itself. The disadvantage is we could have to copy a lot of data if k
    242         // is very small and the amount we will copy is variable.
    243 
    244         // (3) use stack allocated temporary buffers. This method has similar advantages /
    245         // disadvantages to 2 but trades heap space allocations for stack based ones.
    246 
    247         // (4) force people writing kernels to record the number of items written each
    248         // stride. The advantage of this is it would be as cheap as (1) but requires the
    249         // kernel writer maintain the current stride index and that the kernel logic has
    250         // a natural breakpoint in the algorithm in which to record the number.
     228        SliceOffset, /// NOT DONE
     229
     230        // Given a SliceOffset of k, the k-th stream set be the base (zeroth) stream set
     231        // for the kernel. Internally, this stores a scalar in the kernel state and loads
     232        // it once at the start of each segment.
    251233
    252234        Expandable, /// NOT DONE
     
    255237
    256238        /** KERNEL ATTRIBUTES **/
    257 
    258         SelectMinimumInputLength, /// NOT DONE
    259 
    260         // If a kernel has multiple input streams and their final item count differs,
    261         // a MultiBlock kernel will select the *minimum* input item count as it's
    262         // principle item length and truncate the streams to fit
    263 
    264         // NOTE: this is the default if a kernel does not have SelectMaximumInputLength
    265         // set and no PrincipalInputStream was declared.
    266 
    267         SelectMaximumInputLength, /// NOT DONE
    268 
    269         // If a kernel has multiple input streams and their final item count differs,
    270         // a MultiBlock kernel will select the *maximum* input item count as it's
    271         // principle item length and zero-extend the streams accordingly.
    272239
    273240        CanTerminateEarly,
     
    282249        // in the pipeline have also terminated.
    283250
    284         MustProcessAll,
    285 
    286         //Workaround, the kernel will finish only when all of the inputs are consumed
     251        SideEffecting,
     252
     253        // Mark this kernel as side-effecting, which will prevent the pipeline compiler
     254        // from wrongly removing it from the pipeline. All sink kernels that produce a
     255        // result through StdOut/StdErr should be marked as SideEffecting.
     256
     257        Family,
     258
     259        // Marks that this kernel is belongs to a named family and whether an input scalar
     260        // should be surpressed from the generated "main" function because it will be bound
     261        // within it to the appropriate handle/function pointers.
    287262
    288263    };
     
    331306    friend Attribute BlockSize(const unsigned k);
    332307    friend Attribute Principal();
    333     friend Attribute AlwaysConsume();
    334308    friend Attribute RoundUpTo(const unsigned);
     309    friend Attribute ManagedBuffer();
    335310    friend Attribute LookAhead(const unsigned);
    336311    friend Attribute LookBehind(const unsigned);
     
    341316    friend Attribute CanTerminateEarly();
    342317    friend Attribute MustExplicitlyTerminate();
    343     friend Attribute RequiresLinearAccess();
    344     friend Attribute PermitsNonLinearAccess();
     318    friend Attribute RequiresPopCountArray();
     319    friend Attribute RequiresNegatedPopCountArray();
     320    friend Attribute SideEffecting();
     321    friend Attribute Family();
     322
     323    void print(llvm::raw_ostream & out) const noexcept;
    345324
    346325    Attribute(const KindId kind, const unsigned k) : mKind(kind), mAmount(k) { }
     
    368347    }
    369348
    370     Attribute & findAttribute(const AttributeId id) const {
     349    Attribute & findAttribute(const AttributeId id) const LLVM_READNONE {
    371350        return *__findAttribute(id);
    372351    }
     
    374353    Attribute & addAttribute(Attribute attribute);
    375354
    376     bool LLVM_READNONE hasAttributes() const {
     355    bool hasAttributes() const LLVM_READNONE {
    377356        return !empty();
    378357    }
    379358
    380     bool LLVM_READNONE hasAttribute(const AttributeId id) const {
     359    bool hasAttribute(const AttributeId id) const LLVM_READNONE {
    381360        return __findAttribute(id) != nullptr;
    382361    }
     
    384363    AttributeSet() = default;
    385364
     365    AttributeSet(const AttributeSet &) = default;
     366
    386367    AttributeSet(Attribute && attr) { emplace_back(std::move(attr)); }
    387368
    388369    AttributeSet(std::initializer_list<Attribute> attrs) : std::vector<Attribute>(attrs) { }
     370
     371protected:
     372
     373    void print(llvm::raw_ostream & out) const noexcept;
    389374
    390375private:
     
    402387}
    403388
    404 inline Attribute AlwaysConsume() {
    405     return Attribute(Attribute::KindId::AlwaysConsume, 0);
     389inline Attribute ManagedBuffer() {
     390    return Attribute(Attribute::KindId::ManagedBuffer, 0);
    406391}
    407392
     
    431416}
    432417
    433 inline Attribute RequiresLinearAccess() {
    434     return Attribute(Attribute::KindId::RequiresLinearAccess, 0);
    435 }
    436 
    437 inline Attribute PermitsNonLinearAccess() {
    438     return Attribute(Attribute::KindId::PermitsNonLinearAccess, 0);
    439 }
    440 
    441418inline Attribute Misaligned() {
    442419    return Attribute(Attribute::KindId::Misaligned, 0);
     
    459436}
    460437
     438inline Attribute SideEffecting() {
     439    return Attribute(Attribute::KindId::SideEffecting, 0);
     440}
     441
     442inline Attribute Family() {
     443    return Attribute(Attribute::KindId::Family, 0);
     444}
     445
     446inline Attribute RequiresPopCountArray() {
     447    return Attribute(Attribute::KindId::RequiresPopCountArray, 0);
     448}
     449
     450inline Attribute RequiresNegatedPopCountArray() {
     451    return Attribute(Attribute::KindId::RequiresNegatedPopCountArray, 0);
     452}
     453
    461454}
    462455#endif // ATTRIBUTES_H
  • icGREP/icgrep-devel/icgrep/kernels/bitstream_gather_pdep_kernel.cpp

    r6055 r6184  
    121121                        {b->getInt32(0), b->getInt32(32), b->getInt32(64), b->getInt32(96)});
    122122
    123                 Value *nullAddress = this->fill_address(b, 32, 4, b->CreateMul(b->CreateTrunc(swizzleIndex, b->getInt32Ty()),
     123                Value *nullAddress = fill_address(b, 32, 4, b->CreateMul(b->CreateTrunc(swizzleIndex, b->getInt32Ty()),
    124124                                                                        b->getInt32(8)));
    125125
  • icGREP/icgrep-devel/icgrep/kernels/block_kernel.cpp

    r5985 r6184  
    77#include <toolchain/toolchain.h>
    88#include <kernels/streamset.h>
     9#include <kernels/kernel_builder.h>
    910#include <llvm/IR/Constants.h>
    1011#include <llvm/IR/Function.h>
     
    1920#endif
    2021#include <llvm/Transforms/Utils/Local.h>
    21 #include <kernels/streamset.h>
     22#include <llvm/Support/Debug.h>
     23#include <boost/graph/adjacency_list.hpp>
     24#include <util/extended_boost_graph_containers.h>
    2225#include <sstream>
    23 #include <kernels/kernel_builder.h>
    24 #include <boost/math/common_factor.hpp>
    25 #include <llvm/Support/Debug.h>
     26#include <functional>
    2627
    2728using namespace llvm;
    28 using namespace parabix;
    29 using namespace boost::math;
     29using namespace boost;
     30using boost::container::flat_set;
    3031
    3132namespace kernel {
     33
     34using AttrId = Attribute::KindId;
     35using RateId = ProcessingRate::KindId;
     36
     37// TODO: Break the BlockOrientedKernel into two classes, one with an explicit DoFinal block and another that
     38// calls the DoBlock method with optional preamble and postamble hooks. By doing so, we can remove the indirect
     39// branches (or function calls) from the following kernel and simplify the cognitive load for the kernel
     40// programmer. This is less general than the current method but no evidence that being able to reenter the
     41// DoBlock method multiple times from the DoFinal block would ever be useful.
     42
     43// Can we eliminate some of the kernel state (e.g., EOF) by having a general preamble that can create a stack-
     44// allocated struct?
    3245
    3346const auto DO_BLOCK_SUFFIX = "_DoBlock";
     
    4659    BasicBlock * const entryBlock = b->GetInsertBlock();
    4760    mStrideLoopBody = b->CreateBasicBlock(getName() + "_strideLoopBody");
     61    BasicBlock * const incrementCountableItems = b->CreateBasicBlock(getName() + "_incrementCountableItems");
    4862    BasicBlock * const stridesDone = b->CreateBasicBlock(getName() + "_stridesDone");
    4963    BasicBlock * const doFinalBlock = b->CreateBasicBlock(getName() + "_doFinalBlock");
    5064    BasicBlock * const segmentDone = b->CreateBasicBlock(getName() + "_segmentDone");
    5165
     66    ConstantInt * const ZERO = b->getSize(0);
     67
    5268    b->CreateUnlikelyCondBr(mIsFinal, doFinalBlock, mStrideLoopBody);
    5369
     
    5571
    5672    b->SetInsertPoint(mStrideLoopBody);
    57 
    5873    if (b->supportsIndirectBr()) {
    5974        Value * const baseTarget = BlockAddress::get(segmentDone);
     
    6176        mStrideLoopTarget->addIncoming(baseTarget, entryBlock);
    6277    }
    63 
    6478    mStrideBlockIndex = b->CreatePHI(b->getSizeTy(), 2);
    65     mStrideBlockIndex->addIncoming(b->getSize(0), entryBlock);
     79    mStrideBlockIndex->addIncoming(ZERO, entryBlock);
    6680
    6781    /// GENERATE DO BLOCK METHOD
     
    7084
    7185    Value * const nextStrideBlockIndex = b->CreateAdd(mStrideBlockIndex, b->getSize(1));
    72 
    73     incrementDerivedItemCounts(b);
    74 
     86    Value * noMore = b->CreateICmpEQ(nextStrideBlockIndex, numOfBlocks);
     87    if (hasAttribute(AttrId::CanTerminateEarly) ||  hasAttribute(AttrId::MustExplicitlyTerminate)) {
     88        noMore = b->CreateOr(noMore, b->getTerminationSignal());
     89    }
     90    b->CreateUnlikelyCondBr(noMore, stridesDone, incrementCountableItems);
     91
     92    b->SetInsertPoint(incrementCountableItems);
     93    incrementCountableItemCounts(b);
    7594    BasicBlock * const bodyEnd = b->GetInsertBlock();
    7695    if (mStrideLoopTarget) {
     
    7897    }
    7998    mStrideBlockIndex->addIncoming(nextStrideBlockIndex, bodyEnd);
    80     Value * const notDone = b->CreateICmpULT(nextStrideBlockIndex, numOfBlocks);
    81     b->CreateCondBr(notDone, mStrideLoopBody, stridesDone);
     99
     100    b->CreateBr(mStrideLoopBody);
    82101
    83102    stridesDone->moveAfter(bodyEnd);
     
    101120
    102121    b->SetInsertPoint(doFinalBlock);
    103 
    104122    writeFinalBlockMethod(b, getRemainingItems(b));
    105 
    106123    b->CreateBr(segmentDone);
    107124
     
    125142
    126143/** ------------------------------------------------------------------------------------------------------------- *
    127  * @brief getRemainingItems
    128  ** ------------------------------------------------------------------------------------------------------------- */
    129 Value * BlockOrientedKernel::incrementDerivedItemCounts(const std::unique_ptr<KernelBuilder> & b) {
    130 
    131     Value * const nextIndex = b->CreateAdd(mStrideBlockIndex, b->getSize(1));
    132 
    133     mTreatUnsafeKernelOperationsAsErrors = false;
     144 * @brief incrementCountableItemCounts
     145 ** ------------------------------------------------------------------------------------------------------------- */
     146void BlockOrientedKernel::incrementCountableItemCounts(const std::unique_ptr<KernelBuilder> & b) {
    134147
    135148    // Update the processed item counts
    136     for (unsigned i = 0; i < mStreamSetInputs.size(); ++i) {
    137         const Binding & input = mStreamSetInputs[i];
    138         if (hasDerivedItemCount(input)) {
     149    for (unsigned i = 0; i < mInputStreamSets.size(); ++i) {
     150        const Binding & input = mInputStreamSets[i];
     151        if (isCountable(input)) {
    139152            const ProcessingRate & rate = input.getRate();
    140153            Value * offset = nullptr;
    141154            if (rate.isFixed()) {
    142                 offset = b->CreateMul(nextIndex, mInputStrideLength[i]);
     155                offset = b->getSize(ceiling(getUpperBound(input) * getStride()));
    143156            } else { // if (rate.isPopCount() || rate.isNegatedPopCount())
    144                 offset = getPopCountRateItems(b, rate, nextIndex);
     157                offset = getPopCountRateItemCount(b, rate, mStrideBlockIndex);
    145158            }
    146             Value * const processed = b->CreateAdd(mInitialProcessedItemCount[i], offset);
     159            Value * const initial = b->getNonDeferredProcessedItemCount(input);
     160            Value * const processed = b->CreateAdd(initial, offset);
    147161            b->setNonDeferredProcessedItemCount(input, processed);
    148162        }
     
    150164
    151165    // Update the produced item counts
    152     for (unsigned i = 0; i < mStreamSetOutputs.size(); ++i) {
    153         const Binding & output = mStreamSetOutputs[i];
    154         if (hasDerivedItemCount(output)) {
     166    for (unsigned i = 0; i < mOutputStreamSets.size(); ++i) {
     167        const Binding & output = mOutputStreamSets[i];
     168        if (isCountable(output)) {
    155169            const ProcessingRate & rate = output.getRate();
    156170            Value * offset = nullptr;
    157171            if (rate.isFixed()) {
    158                 offset = b->CreateMul(nextIndex, mOutputStrideLength[i]);
     172                offset = b->getSize(ceiling(getUpperBound(output) * getStride()));
    159173            } else { // if (rate.isPopCount() || rate.isNegatedPopCount())
    160                 offset = getPopCountRateItems(b, rate, nextIndex);
     174                offset = getPopCountRateItemCount(b, rate, mStrideBlockIndex);
    161175            }
    162             Value * const produced = b->CreateAdd(mInitialProducedItemCount[i], offset);
     176            Value * const initial = b->getNonDeferredProducedItemCount(output);
     177            Value * const produced = b->CreateAdd(initial, offset);
    163178            b->setNonDeferredProducedItemCount(output, produced);
    164179        }
    165180    }
    166 
    167     mTreatUnsafeKernelOperationsAsErrors = true;
    168 
    169     return nextIndex;
    170 }
    171 
     181}
    172182/** ------------------------------------------------------------------------------------------------------------- *
    173183 * @brief getRemainingItems
     
    175185Value * BlockOrientedKernel::getRemainingItems(const std::unique_ptr<KernelBuilder> & b) {
    176186    Value * remainingItems = nullptr;
    177     const auto count = mStreamSetInputs.size();
     187    const auto count = mInputStreamSets.size();
    178188    if (count == 1) {
    179189        return mAccessibleInputItems[0];
    180190    } else {
    181191        for (unsigned i = 0; i < count; i++) {
    182             if (mStreamSetInputs[i].isPrincipal()) {
     192            if (mInputStreamSets[i].isPrincipal()) {