Changeset 4991


Ignore:
Timestamp:
Mar 25, 2016, 5:42:03 PM (20 months ago)
Author:
nmedfort
Message:

Symbol table work and untested kernel instatiate method for multiple input streams

Location:
icGREP/icgrep-devel/icgrep
Files:
1 added
13 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r4988 r4991  
    4949set(Boost_USE_MULTITHREADED OFF)
    5050set(Boost_USE_STATIC_RUNTIME ON)
    51 
    52 find_package(Boost 1.46 COMPONENTS system iostreams filesystem REQUIRED)
     51find_package(Boost 1.46 REQUIRED COMPONENTS system filesystem iostreams)
     52
     53include_directories("${Boost_INCLUDE_DIRS}")
     54link_directories(${Boost_LIBRARY_DIR})
     55SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_BOOST")
    5356
    5457SET(PABLO_SRC pablo/pabloAST.cpp pablo/ps_if.cpp pablo/ps_while.cpp pablo/function.cpp pablo/codegenstate.cpp pablo/builder.cpp pablo/symbol_generator.cpp pablo/printer_pablos.cpp pablo/pablo_toolchain.cpp)
    55 SET(PABLO_SRC ${PABLO_SRC} pablo/pablo_compiler.cpp pablo/carry_manager.cpp pablo/carry_data.cpp IDISA/idisa_builder.cpp IDISA/idisa_avx_builder.cpp IDISA/idisa_i64_builder.cpp IDISA/idisa_sse_builder.cpp IDISA/idisa_target.cpp kernels/s2p_kernel.cpp kernels/scanmatchgen.cpp kernels/kernel.cpp kernels/pipeline.cpp)
     58SET(PABLO_SRC ${PABLO_SRC} pablo/pablo_compiler.cpp pablo/carry_manager.cpp pablo/carry_data.cpp IDISA/idisa_builder.cpp IDISA/idisa_avx_builder.cpp IDISA/idisa_i64_builder.cpp IDISA/idisa_sse_builder.cpp IDISA/idisa_target.cpp)
     59SET(PABLO_SRC ${PABLO_SRC} kernels/s2p_kernel.cpp kernels/scanmatchgen.cpp kernels/kernel.cpp kernels/pipeline.cpp)
    5660SET(PABLO_SRC ${PABLO_SRC} pablo/analysis/pabloverifier.cpp)
    5761SET(PABLO_SRC ${PABLO_SRC} pablo/optimizers/pablo_simplifier.cpp pablo/optimizers/codemotionpass.cpp)
     
    106110add_executable(icgrep icgrep.cpp toolchain.cpp grep_engine.cpp object_cache.cpp ${PRECOMPILED_FILES})
    107111add_executable(casefold casefold.cpp kernels/p2s_kernel.cpp kernels/stdout_kernel.cpp kernels/casefold_pipeline.cpp)
     112add_executable(symtbl symboltable.cpp kernels/symboltablepipeline.cpp toolchain.cpp grep_engine.cpp object_cache.cpp ${PRECOMPILED_FILES})
    108113
    109114IF(ENABLE_PREGENERATED_UCD_FUNCTIONS)
    110115add_dependencies(icgrep run_generate_predefined_ucd_functions)
    111116ENDIF()
    112 IF(Boost_FOUND)
    113     include_directories("${Boost_INCLUDE_DIRS}")
    114     link_directories(${Boost_LIBRARY_DIR})
    115     SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_BOOST")
    116     target_link_libraries(icgrep ${Boost_LIBRARIES})
    117     target_link_libraries(casefold ${Boost_LIBRARIES})
    118 ENDIF()
     117
    119118IF (PRINT_TIMING_INFORMATION)
    120119    find_package(PAPI REQUIRED)
     
    123122ENDIF()
    124123
     124target_link_libraries(icgrep ${Boost_LIBRARIES})
     125target_link_libraries(symtbl ${Boost_LIBRARIES})
     126target_link_libraries(casefold ${Boost_LIBRARIES})
    125127
    126128target_link_libraries (icgrep UCDlib PabloADT RegExpCompiler CCADT ${REQ_LLVM_LIBRARIES})
     129target_link_libraries (symtbl UCDlib PabloADT RegExpCompiler CCADT ${REQ_LLVM_LIBRARIES})
    127130target_link_libraries (casefold UCDlib PabloADT RegExpCompiler CCADT ${REQ_LLVM_LIBRARIES})
    128131
  • icGREP/icgrep-devel/icgrep/UCD/ucd_compiler.hpp

    r4841 r4991  
    44#include <re/re_cc.h>
    55#include <vector>
    6 #ifdef USE_BOOST
    76#include <boost/container/flat_map.hpp>
    8 #else
    9 #include <unordered_map>
    10 #endif
    117
    128namespace cc {
     
    3430    using codepoint_t = re::codepoint_t;
    3531    using RangeList = std::vector<re::interval_t>;
    36     #ifdef USE_BOOST
    3732    using TargetMap = boost::container::flat_map<const UnicodeSet *, PabloAST *>;
    38     #else
    39     using TargetMap = std::unordered_map<const UnicodeSet *, PabloAST *>;
    40     #endif
    4133    using Target = std::pair<const UnicodeSet *, PabloAST *>;
    4234    using TargetVector = std::vector<Target>;
     
    4739public:
    4840
    49     #ifdef USE_BOOST
    5041    using NameMap = boost::container::flat_map<re::Name *, PabloAST *>;
    51     #else
    52     using NameMap = std::unordered_map<re::Name *, PabloAST *>;
    53     #endif
    5442
    5543    UCDCompiler(cc::CC_Compiler & ccCompiler);
  • icGREP/icgrep-devel/icgrep/icgrep-devel.files

    r4986 r4991  
    705705util/slab_allocator.h
    706706util/ispc.cpp
     707IDISA/idisa_target.h
     708IDISA/idisa_target.cpp
     709pablo/pablo_toolchain.cpp
     710pablo/pablo_toolchain.h
     711cc/cc_compiler.cpp
     712cc/cc_compiler.h
     713IDISA/idisa_avx_builder.cpp
     714IDISA/idisa_avx_builder.h
     715IDISA/idisa_builder.cpp
     716IDISA/idisa_builder.h
     717IDISA/idisa_i64_builder.cpp
     718IDISA/idisa_i64_builder.h
     719IDISA/idisa_sse_builder.cpp
     720IDISA/idisa_sse_builder.h
     721IDISA/idisa_target.cpp
     722IDISA/idisa_target.h
     723kernels/deletion.cpp
     724kernels/deletion.h
     725kernels/instance.h
     726kernels/kernel.cpp
     727kernels/kernel.h
     728kernels/pipeline.cpp
     729kernels/pipeline.h
     730kernels/s2p_kernel.cpp
     731kernels/s2p_kernel.h
     732kernels/scanmatchgen.cpp
     733kernels/scanmatchgen.h
     734kernels/streamset.h
     735kernels/symboltablepipeline.cpp
     736kernels/symboltablepipeline.h
     737pablo/analysis/pabloverifier.cpp
     738pablo/analysis/pabloverifier.hpp
     739pablo/optimizers/booleanreassociationpass.cpp
     740pablo/optimizers/booleanreassociationpass.h
     741pablo/optimizers/codemotionpass.cpp
     742pablo/optimizers/codemotionpass.h
     743pablo/optimizers/distributivepass.cpp
     744pablo/optimizers/distributivepass.h
     745pablo/optimizers/graph-facade.hpp
     746pablo/optimizers/pablo_automultiplexing.cpp
     747pablo/optimizers/pablo_automultiplexing.hpp
     748pablo/optimizers/pablo_bddminimization.cpp
     749pablo/optimizers/pablo_bddminimization.h
     750pablo/optimizers/pablo_simplifier.cpp
     751pablo/optimizers/pablo_simplifier.hpp
     752pablo/optimizers/schedulingprepass.cpp
     753pablo/optimizers/schedulingprepass.h
     754pablo/passes/factorizedfg.cpp
     755pablo/passes/factorizedfg.h
     756pablo/passes/flattenassociativedfg.cpp
     757pablo/passes/flattenassociativedfg.h
     758pablo/builder.cpp
     759pablo/builder.hpp
     760pablo/carry_data.cpp
     761pablo/carry_data.h
     762pablo/carry_manager.cpp
     763pablo/carry_manager.h
     764pablo/codegenstate.cpp
     765pablo/codegenstate.h
     766pablo/expression_map.hpp
     767pablo/function.cpp
     768pablo/function.h
     769pablo/pablo_compiler.cpp
     770pablo/pablo_compiler.h
     771pablo/pablo_toolchain.cpp
     772pablo/pablo_toolchain.h
     773pablo/pabloAST.cpp
     774pablo/pabloAST.h
     775pablo/pe_advance.h
     776pablo/pe_and.h
     777pablo/pe_call.h
     778pablo/pe_count.h
     779pablo/pe_integer.h
     780pablo/pe_lookahead.h
     781pablo/pe_matchstar.h
     782pablo/pe_next.h
     783pablo/pe_not.h
     784pablo/pe_ones.h
     785pablo/pe_or.h
     786pablo/pe_scanthru.h
     787pablo/pe_sel.h
     788pablo/pe_setithbit.h
     789pablo/pe_string.h
     790pablo/pe_var.h
     791pablo/pe_xor.h
     792pablo/pe_zeroes.h
     793pablo/printer_pablos.cpp
     794pablo/printer_pablos.h
     795pablo/ps_assign.h
     796pablo/ps_if.cpp
     797pablo/ps_if.h
     798pablo/ps_while.cpp
     799pablo/ps_while.h
     800pablo/symbol_generator.cpp
     801pablo/symbol_generator.h
     802re/parsefailure.cpp
     803re/parsefailure.h
     804re/printer_re.cpp
     805re/printer_re.h
     806re/re_alt.h
     807re/re_analysis.cpp
     808re/re_analysis.h
     809re/re_any.h
     810re/re_assertion.h
     811re/re_cc.cpp
     812re/re_cc.h
     813re/re_compiler.cpp
     814re/re_compiler.h
     815re/re_diff.cpp
     816re/re_diff.h
     817re/re_end.h
     818re/re_grapheme_boundary.hpp
     819re/re_intersect.cpp
     820re/re_intersect.h
     821re/re_memoizer.hpp
     822re/re_name.h
     823re/re_nullable.cpp
     824re/re_nullable.h
     825re/re_parser.cpp
     826re/re_parser.h
     827re/re_re.cpp
     828re/re_re.h
     829re/re_rep.cpp
     830re/re_rep.h
     831re/re_seq.h
     832re/re_simplifier.cpp
     833re/re_simplifier.h
     834re/re_start.h
     835re/re_toolchain.cpp
     836re/re_toolchain.h
     837UCD/Blocks.h
     838UCD/CaseFolding_txt.cpp
     839UCD/CaseFolding_txt.h
     840UCD/DerivedAge.h
     841UCD/DerivedBidiClass.h
     842UCD/DerivedBinaryProperties.h
     843UCD/DerivedCombiningClass.h
     844UCD/DerivedCoreProperties.h
     845UCD/DerivedDecompositionType.h
     846UCD/DerivedGeneralCategory.h
     847UCD/DerivedJoiningGroup.h
     848UCD/DerivedJoiningType.h
     849UCD/DerivedNumericType.h
     850UCD/EastAsianWidth.h
     851UCD/GraphemeBreakProperty.h
     852UCD/HangulSyllableType.h
     853UCD/LineBreak.h
     854UCD/precompiled_properties.cpp
     855UCD/precompiled_properties.h
     856UCD/PropertyAliases.h
     857UCD/PropertyObjects.cpp
     858UCD/PropertyObjects.h
     859UCD/PropertyObjectTable.h
     860UCD/PropertyValueAliases.h
     861UCD/PropList.h
     862UCD/resolve_properties.cpp
     863UCD/resolve_properties.h
     864UCD/ScriptExtensions.h
     865UCD/Scripts.h
     866UCD/SentenceBreakProperty.h
     867UCD/ucd_compiler.cpp
     868UCD/ucd_compiler.hpp
     869UCD/unicode_set.cpp
     870UCD/unicode_set.h
     871UCD/UnicodeNameData.cpp
     872UCD/UnicodeNameData.h
     873UCD/WordBreakProperty.h
     874util/ispc.cpp
     875util/papi_helper.hpp
     876util/slab_allocator.h
     877generate_predefined_ucd_functions.cpp
     878grep_engine.cpp
     879grep_engine.h
     880hrtime.h
     881icgrep.cpp
     882object_cache.cpp
     883object_cache.h
     884symboltable.cpp
     885toolchain.cpp
     886toolchain.h
     887utf8_encoder.cpp
     888utf8_encoder.h
     889utf_encoding.h
     890kernels/casefold_pipeline.h
     891kernels/casefold_pipeline.cpp
  • icGREP/icgrep-devel/icgrep/kernels/casefold_pipeline.cpp

    r4988 r4991  
    141141    PHINode * remainingBytes = iBuilder->CreatePHI(int64ty, 2, "remainingBytes");
    142142    remainingBytes->addIncoming(initialBufferSize, initialBlock);
    143     //iBuilder->CallPrintInt("remainingBytes", iBuilder->CreatePtrToInt(remainingBytes, iBuilder->getInt64Ty()));
    144143
    145144    Constant * const step = ConstantInt::get(int64ty, mBlockSize);
     
    169168   
    170169    iBuilder->SetInsertPoint(finalEmptyBlock);
    171     s2pInstance->clearOutputStream();
     170    s2pInstance->clearOutputStreamSet();
    172171    iBuilder->CreateBr(endBlock);
    173172   
  • icGREP/icgrep-devel/icgrep/kernels/instance.h

    r4986 r4991  
    4949    }
    5050
    51     void clearOutputStream(const unsigned streamOffset = 0) {
    52         mDefinition->clearOutputStream(mMemory, streamOffset);
     51    void clearOutputStreamSet(const unsigned streamOffset = 0) {
     52        mDefinition->clearOutputStreamSet(mMemory, streamOffset);
    5353    }
    5454
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r4988 r4991  
    88#include <IDISA/idisa_builder.h>
    99#include <kernels/instance.h>
     10#include <tuple>
     11#include <boost/functional/hash_fwd.hpp>
     12#include <unordered_map>
    1013
    1114using namespace llvm;
     
    5154unsigned KernelBuilder::addInternalState(llvm::Type * const type, std::string && name) {
    5255    if (LLVM_UNLIKELY(mInternalStateNameMap.count(name) != 0)) {
    53         throw std::runtime_error("Kernel already contains internal state " + name);
     56        throw std::runtime_error("Kernel already contains internal state '" + name + "'");
    5457    }
    5558    const unsigned index = addInternalState(type);
     
    121124Value * KernelBuilder::getInputStream(Value * const instance, const unsigned index, const unsigned streamOffset) {
    122125    assert (instance);
     126    assert (index < mInputStream.size());
    123127    Value * inputStream = iBuilder->CreateLoad(iBuilder->CreateGEP(instance,
    124128        {iBuilder->getInt32(0), iBuilder->getInt32(INPUT_STREAM_SET), iBuilder->getInt32(0)}));
     
    129133        offset = iBuilder->CreateAdd(offset, ConstantInt::get(offset->getType(), streamOffset));
    130134    }   
    131     offset = iBuilder->CreateCall(modFunction, offset, "offset");
    132     return iBuilder->CreateGEP(inputStream, { offset, iBuilder->getInt32(index) });
     135    return iBuilder->CreateGEP(inputStream, { iBuilder->CreateCall(modFunction, offset), iBuilder->getInt32(index) });
    133136}
    134137
     
    178181Value * KernelBuilder::getOutputStream(Value * const instance, const unsigned index, const unsigned streamOffset) {
    179182    assert (instance);
    180     Value * offset = getOffset(instance, streamOffset);
     183    Value * const offset = getOffset(instance, streamOffset);
    181184    Value * const indices[] = {iBuilder->getInt32(0), iBuilder->getInt32(OUTPUT_STREAM_SET), offset, iBuilder->getInt32(index)};
    182185    return iBuilder->CreateGEP(instance, indices);
     
    208211    FunctionType * const functionType = FunctionType::get(iBuilder->getVoidTy(), {PointerType::get(mKernelStateType, 0)}, false);
    209212    mDoBlock = Function::Create(functionType, GlobalValue::ExternalLinkage, mKernelName + "_DoBlock", mMod);
    210     mDoBlock->setCallingConv(CallingConv::C);
    211   //  mDoBlock->addAttribute(1, Attribute::NoCapture);
    212  //   mDoBlock->addAttribute(AttributeSet::FunctionIndex, Attribute::ReadNone);
    213  //   mDoBlock->addAttribute(AttributeSet::FunctionIndex, Attribute::NoUnwind);
     213    mDoBlock->setCallingConv(CallingConv::C);   
     214    mDoBlock->setDoesNotCapture(1);
     215    mDoBlock->setDoesNotThrow();
    214216
    215217    Function::arg_iterator args = mDoBlock->arg_begin();
    216     mKernelParam = args++;
    217     mKernelParam->setName("this");
     218    mKernelState = args++;
     219    mKernelState->setName("this");
    218220
    219221    iBuilder->SetInsertPoint(BasicBlock::Create(mMod->getContext(), "entry", mDoBlock, 0));
    220 
    221 //    mLocalBlockNo = iBuilder->CreateLoad(getBlockNo());
    222 //    Value * blockNo = iBuilder->CreateLoad(getBlockNo());
    223 //    iBuilder->CallPrintInt(mKernelName + "_BlockNo", blockNo);
    224 //    Value * modFunction = iBuilder->CreateLoad(iBuilder->CreateGEP(mKernelParam, {iBuilder->getInt32(0), iBuilder->getInt32(INPUT_STREAM_SET), iBuilder->getInt32(1)}));
    225 //    blockNo = iBuilder->CreateCall(modFunction, blockNo);
    226 //    iBuilder->CallPrintInt(mKernelName + "_Offset", blockNo);
    227 
    228222
    229223    return mDoBlock;
     
    242236    iBuilder->CreateRetVoid();
    243237
     238    eliminateRedundantMemoryOperations(mDoBlock);
     239
    244240    // Generate the zero initializer
    245241    PointerType * modFunctionType = PointerType::get(FunctionType::get(iBuilder->getInt64Ty(), {iBuilder->getInt64Ty()}, false), 0);
     
    248244    mConstructor = Function::Create(constructorType, GlobalValue::ExternalLinkage, mKernelName + "_Constructor", mMod);
    249245    mConstructor->setCallingConv(CallingConv::C);
    250     mConstructor->addAttribute(1, Attribute::NoCapture);
    251     //mConstructor->addAttribute(AttributeSet::FunctionIndex, Attribute::InlineHint);
    252    // mConstructor->addAttribute(AttributeSet::FunctionIndex, Attribute::ReadNone);
    253     //mConstructor->addAttribute(AttributeSet::FunctionIndex, Attribute::NoUnwind);
     246    mDoBlock->setDoesNotCapture(1);
     247    mConstructor->addAttribute(AttributeSet::FunctionIndex, Attribute::InlineHint);
     248    mDoBlock->setDoesNotThrow();
     249
    254250    auto args = mConstructor->arg_begin();
    255     mKernelParam = args++;
    256     mKernelParam->setName("this");
     251    mKernelState = args++;
     252    mKernelState->setName("this");
    257253    Value * const inputStream = args++;
    258254    inputStream->setName("inputStream");
     
    272268    }
    273269
    274     Value * const input = iBuilder->CreateGEP(mKernelParam, {iBuilder->getInt32(0), iBuilder->getInt32(INPUT_STREAM_SET)});
     270    Value * const input = iBuilder->CreateGEP(mKernelState, {iBuilder->getInt32(0), iBuilder->getInt32(INPUT_STREAM_SET)});
    275271    iBuilder->CreateStore(inputStream, iBuilder->CreateGEP(input, {iBuilder->getInt32(0), iBuilder->getInt32(0)}));
    276272    iBuilder->CreateStore(modFunction, iBuilder->CreateGEP(input, {iBuilder->getInt32(0), iBuilder->getInt32(1)}));
     
    287283//        mStreamSetFunction->addAttribute(2, Attribute::NoCapture);
    288284//        mStreamSetFunction->addAttribute(AttributeSet::FunctionIndex, Attribute::InlineHint);
    289 //        mStreamSetFunction->addAttribute(AttributeSet::FunctionIndex, Attribute::ReadNone);
    290285//        mStreamSetFunction->addAttribute(AttributeSet::FunctionIndex, Attribute::NoUnwind);
    291286//        Value * offset = arg;
     
    308303
    309304/** ------------------------------------------------------------------------------------------------------------- *
     305 * @brief eliminateRedundantMemoryOperations
     306 ** ------------------------------------------------------------------------------------------------------------- */
     307inline void KernelBuilder::eliminateRedundantMemoryOperations(Function * const function) {
     308
     309
     310}
     311
     312/** ------------------------------------------------------------------------------------------------------------- *
    310313 * @brief instantiate
    311314 *
     
    327330Instance * KernelBuilder::instantiate(llvm::Value * const inputStream) {
    328331    AllocaInst * const memory = iBuilder->CreateAlloca(mKernelStateType);
    329     Value * ptr = inputStream;
    330     iBuilder->CreateCall3(mConstructor, memory, iBuilder->CreatePointerCast(ptr, mInputStreamType), CreateModFunction(0));
     332    iBuilder->CreateCall3(mConstructor, memory, iBuilder->CreatePointerCast(inputStream, mInputStreamType), CreateModFunction(0));
     333    return new Instance(this, memory);
     334}
     335
     336/** ------------------------------------------------------------------------------------------------------------- *
     337 * @brief instantiate
     338 *
     339 * Generate a new instance of this kernel and call the default constructor to initialize it
     340 ** ------------------------------------------------------------------------------------------------------------- */
     341Instance * KernelBuilder::instantiate(std::initializer_list<llvm::Value *> inputStreams) {
     342    if (mInputStreamType->getStructNumElements() != inputStreams.size()) {
     343        throw std::runtime_error(mKernelName + ".instantiate expected " + std::to_string(inputStreams.size()) +
     344                                 "elements but was given " + std::to_string(mInputStreamType->getStructNumElements()));
     345    }
     346    AllocaInst * const memory = iBuilder->CreateAlloca(mKernelStateType);
     347    AllocaInst * inputStruct = iBuilder->CreateAlloca(mInputStreamType, 0);
     348    unsigned i = 0;
     349    for (Value * inputStream : inputStreams) {
     350        Value * ptr = iBuilder->CreateGEP(inputStruct, { iBuilder->getInt32(0), iBuilder->getInt32(i++)});
     351        iBuilder->CreateStore(inputStream, ptr);
     352    }
     353    iBuilder->CreateCall3(mConstructor, memory, iBuilder->CreatePointerCast(inputStruct, mInputStreamType), CreateModFunction(0));
    331354    return new Instance(this, memory);
    332355}
     
    341364
    342365/** ------------------------------------------------------------------------------------------------------------- *
    343  * @brief clearOutputStream
    344  ** ------------------------------------------------------------------------------------------------------------- */
    345 void KernelBuilder::clearOutputStream(Value * const instance, const unsigned streamOffset) {
     366 * @brief clearOutputStreamSet
     367 *
     368 * Zero out the i + streamOffset stream set memory, where i is the current stream set indicated by the BlockNo.
     369 ** ------------------------------------------------------------------------------------------------------------- */
     370void KernelBuilder::clearOutputStreamSet(Value * const instance, const unsigned streamOffset) {
    346371    Value * const indices[] = {iBuilder->getInt32(0), iBuilder->getInt32(OUTPUT_STREAM_SET), getOffset(instance, streamOffset)};
    347     Value * ptr = iBuilder->CreateGEP(instance, indices, "ptr");
     372    Value * ptr = iBuilder->CreateGEP(instance, indices);
    348373    unsigned size = 0;
    349374    for (unsigned i = 0; i < mOutputStreamType->getStructNumElements(); ++i) {
     
    408433}
    409434
    410 /** ------------------------------------------------------------------------------------------------------------- *
    411  * @brief setLongestLookaheadAmount
    412  ** ------------------------------------------------------------------------------------------------------------- */
    413 void KernelBuilder::setLongestLookaheadAmount(const unsigned bits) {
    414     const unsigned blockWidth = iBuilder->getBitBlockWidth();
    415     const unsigned lookaheadBlocks = (bits + blockWidth - 1) / blockWidth;
    416     mBufferSize = (lookaheadBlocks + 1);
    417 }
    418 
    419435} // end of namespace kernel
  • icGREP/icgrep-devel/icgrep/kernels/kernel.h

    r4986 r4991  
    5959
    6060    inline llvm::Value * getInputStream(const unsigned index, const unsigned streamOffset = 0) {
    61         return getInputStream(mKernelParam, index, streamOffset);
     61        return getInputStream(mKernelState, index, streamOffset);
    6262    }
    6363
    6464    inline llvm::Value * getInputScalar(const unsigned index) {
    65         return getInputScalar(mKernelParam, index);
     65        return getInputScalar(mKernelState, index);
    6666    }
    6767
    6868    llvm::Value * getInternalState(const std::string & name) {
    69         return getInternalState(mKernelParam, name);
     69        return getInternalState(mKernelState, name);
    7070    }
    7171
    7272    void setInternalState(const std::string & name, llvm::Value * value) {
    73         setInternalState(mKernelParam, name, value);
     73        setInternalState(mKernelState, name, value);
    7474    }
    7575
    7676    llvm::Value * getInternalState(const unsigned index) {
    77         return getInternalState(mKernelParam, index);
     77        return getInternalState(mKernelState, index);
    7878    }
    7979
    8080    void setInternalState(const unsigned index, llvm::Value * value) {
    81         setInternalState(mKernelParam, index, value);
     81        setInternalState(mKernelState, index, value);
    8282    }
    8383
    8484    llvm::Value * getOutputStream(const unsigned index, const unsigned streamOffset = 0) {
    85         return getOutputStream(mKernelParam, index, streamOffset);
     85        return getOutputStream(mKernelState, index, streamOffset);
    8686    }
    8787
     
    9191
    9292    llvm::Value * getOutputScalar(const unsigned index) {
    93         return getOutputScalar(mKernelParam, index);
     93        return getOutputScalar(mKernelState, index);
    9494    }
    9595
     
    9999
    100100    llvm::Value * getBlockNo() {
    101         return getBlockNo(mKernelParam);
     101        return getBlockNo(mKernelState);
    102102    }
    103103
     
    114114    kernel::Instance * instantiate(llvm::Value * const inputStream);
    115115
    116     kernel::Instance * instantiate(std::pair<llvm::Value *, unsigned> &&inputStream);
     116    kernel::Instance * instantiate(std::initializer_list<llvm::Value *> inputStreams);
     117
     118    kernel::Instance * instantiate(std::pair<llvm::Value *, unsigned> && inputStream);
    117119
    118120    llvm::Type * getKernelStateType() const;
     
    122124    llvm::Function * getDoBlockFunction() const;
    123125
    124     void clearOutputStream(llvm::Value * const instance, const unsigned streamOffset = 0);
    125 
    126     void setLongestLookaheadAmount(const unsigned bits);
     126    void clearOutputStreamSet(llvm::Value * const instance, const unsigned streamOffset = 0);
    127127
    128128protected:
     
    153153
    154154    llvm::Function * CreateModFunction(const unsigned size);
     155
     156    void eliminateRedundantMemoryOperations(llvm::Function * const function);
    155157
    156158private:
     
    169171    llvm::Type *                        mOutputStreamType;
    170172
    171     llvm::Value *                       mInputParam;
    172     llvm::Value *                       mKernelParam;
     173    llvm::Value *                       mKernelState;
    173174    unsigned                            mBlockNoIndex;
    174175
     
    192193
    193194inline llvm::Value * KernelBuilder::getKernelState() const {
    194     return mKernelParam;
     195    return mKernelState;
    195196}
    196197
  • icGREP/icgrep-devel/icgrep/kernels/pipeline.cpp

    r4986 r4991  
    104104    scanMatchInstance->setInternalState("FileName", fileName);
    105105
    106     // iBuilder->CallPrintInt("source", iBuilder->CreatePtrToInt(ptr, iBuilder->getInt64Ty()));
    107 
    108106    Value * initialBufferSize = nullptr;
    109107    BasicBlock * initialBlock = nullptr;
     
    165163
    166164    iBuilder->SetInsertPoint(finalEmptyBlock);
    167     s2pInstance->clearOutputStream();
     165    s2pInstance->clearOutputStreamSet();
    168166    iBuilder->CreateBr(endBlock);
    169167
  • icGREP/icgrep-devel/icgrep/kernels/scanmatchgen.cpp

    r4974 r4991  
    227227    Value * kernelStuctParam = kBuilder->getKernelState();
    228228
    229     Value * scanwordPos = iBuilder->CreateBlockAlignedLoad(kBuilder->getInternalState("BlockNo"));
     229    Value * scanwordPos = iBuilder->CreateLoad(kBuilder->getInternalState("BlockNo"));
    230230    scanwordPos = iBuilder->CreateMul(scanwordPos, ConstantInt::get(scanwordPos->getType(), iBuilder->getBitBlockWidth()));
    231231
  • icGREP/icgrep-devel/icgrep/kernels/symboltablepipeline.cpp

    r4986 r4991  
    1515#include <pablo/function.h>
    1616#include <pablo/pablo_compiler.h>
     17#include <pablo/analysis/pabloverifier.hpp>
    1718
    1819#include <re/re_cc.h>
     
    2627#include <pablo/printer_pablos.h>
    2728#include <iostream>
     29
     30#include <llvm/IR/Intrinsics.h>
    2831
    2932using namespace re;
     
    8588PabloFunction * SymbolTableBuilder::generateSortingFunction(const PabloFunction * const leading, const std::vector<unsigned> & endpoints) {
    8689    PabloFunction * const function = PabloFunction::Create("sorting", leading->getNumOfResults(), leading->getNumOfResults() * 2);
    87     PabloBlock * const entry = function->getEntryBlock();
     90    PabloBlock * entry = function->getEntryBlock();
    8891    function->setParameter(0, entry->createVar("S"));
    8992    function->setParameter(1, entry->createVar("E"));
     
    100103        PabloAST * S = entry->createAnd(L, R);
    101104        Assign * Si = entry->createAssign("S_" + std::to_string(i), S);
    102         R = entry->createXor(R, S);
    103105        PabloAST * F = entry->createScanThru(R, E);
    104106        Assign * Ei = entry->createAssign("E_" + std::to_string(i), F);
    105107        function->setResult(i * 2, Si);
    106108        function->setResult(i * 2 + 1, Ei);
     109        R = entry->createXor(R, S);
    107110        ++i;
    108111        lowerbound = endpoint;
     
    114117    function->setResult(i * 2 + 1, Ei);
    115118    mLongestLookahead = lowerbound;
     119
    116120    return function;
     121}
     122
     123/** ------------------------------------------------------------------------------------------------------------- *
     124 * @brief generateCountForwardZeroes
     125 ** ------------------------------------------------------------------------------------------------------------- */
     126inline Value * generateCountForwardZeroes(IDISA::IDISA_Builder * iBuilder, Value * bits) {
     127    Value * cttzFunc = Intrinsic::getDeclaration(iBuilder->getModule(), Intrinsic::cttz, bits->getType());
     128    return iBuilder->CreateCall(cttzFunc, std::vector<Value *>({bits, ConstantInt::get(iBuilder->getInt1Ty(), 0)}));
     129}
     130
     131/** ------------------------------------------------------------------------------------------------------------- *
     132 * @brief generateGather
     133 ** ------------------------------------------------------------------------------------------------------------- */
     134inline Value * SymbolTableBuilder::generateGather(Value * const base, Value * const vindex) {
     135
     136    /*
     137        From Intel:
     138
     139        extern __m256i _mm256_i32gather_epi32(int const * base, __m256i vindex, const int scale)
     140
     141        From Clang avx2intrin.h:
     142
     143        #define _mm256_i32gather_epi32(m, i, s) __extension__ ({ \
     144          (__m256i)__builtin_ia32_gatherd_d256((__v8si)_mm256_undefined_si256(), \
     145                                               (int const *)(m),  \
     146                                               (__v8si)(__m256i)(i), \
     147                                               (__v8si)_mm256_set1_epi32(-1), (s)); })
     148
     149        From llvm IntrinsicsX86.td:
     150
     151        def llvm_ptr_ty        : LLVMPointerType<llvm_i8_ty>;             // i8*
     152
     153        def int_x86_avx2_gather_d_d_256 : GCCBuiltin<"__builtin_ia32_gatherd_d256">,
     154           Intrinsic<[llvm_v8i32_ty],
     155           [llvm_v8i32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty],
     156           [IntrReadArgMem]>;
     157
     158     */
     159
     160    VectorType * const vecType = VectorType::get(iBuilder->getInt32Ty(), 8);
     161    Function * vgather = Intrinsic::getDeclaration(iBuilder->getModule(), Intrinsic::x86_avx2_gather_d_d_256);
     162    return iBuilder->CreateCall(vgather, {Constant::getAllOnesValue(vecType), base, iBuilder->CreateBitCast(vindex, vecType), Constant::getAllOnesValue(vecType), iBuilder->getInt8(1)});
     163}
     164
     165/** ------------------------------------------------------------------------------------------------------------- *
     166 * @brief generateMaskedGather
     167 ** ------------------------------------------------------------------------------------------------------------- */
     168inline Value * SymbolTableBuilder::generateMaskedGather(Value * const base, Value * const vindex, Value * const mask) {
     169
     170    /*
     171        From Intel:
     172
     173        extern __m256i _mm256_mask_i32gather_epi32(__m256i def_vals, int const * base, __m256i vindex, __m256i vmask, const int scale);
     174
     175        From Clang avx2intrin.h:
     176
     177        #define _mm256_mask_i32gather_epi32(a, m, i, mask, s) __extension__ ({ \
     178           (__m256i)__builtin_ia32_gatherd_d256((__v8si)(__m256i)(a), \
     179                                                (int const *)(m), \
     180                                                (__v8si)(__m256i)(i), \
     181                                                (__v8si)(__m256i)(mask), (s)); })
     182        From llvm IntrinsicsX86.td:
     183
     184        def llvm_ptr_ty        : LLVMPointerType<llvm_i8_ty>;             // i8*
     185
     186        def int_x86_avx2_gather_d_d_256 : GCCBuiltin<"__builtin_ia32_gatherd_d256">,
     187           Intrinsic<[llvm_v8i32_ty],
     188           [llvm_v8i32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty],
     189           [IntrReadArgMem]>;
     190
     191     */
     192
     193    VectorType * const vecType = VectorType::get(iBuilder->getInt32Ty(), 8);
     194    Function * vgather = Intrinsic::getDeclaration(iBuilder->getModule(), Intrinsic::x86_avx2_gather_d_d_256);
     195    return iBuilder->CreateCall(vgather, {Constant::getNullValue(vecType), base, iBuilder->CreateBitCast(vindex, vecType), iBuilder->CreateBitCast(mask, vecType), iBuilder->getInt8(1)});
     196}
     197
     198/** ------------------------------------------------------------------------------------------------------------- *
     199 * @brief generateResetLowestBit
     200 ** ------------------------------------------------------------------------------------------------------------- */
     201inline Value * generateResetLowestBit(IDISA::IDISA_Builder * iBuilder, Value * bits) {
     202    Value * bits_minus1 = iBuilder->CreateSub(bits, ConstantInt::get(bits->getType(), 1));
     203    return iBuilder->CreateAnd(bits_minus1, bits);
     204}
     205
     206/** ------------------------------------------------------------------------------------------------------------- *
     207 * @brief generateScanMatch
     208 ** ------------------------------------------------------------------------------------------------------------- */
     209void SymbolTableBuilder::generateScannerKernel(KernelBuilder * kBuilder, const unsigned minKeyLength, const unsigned maxKeyLength, const unsigned scanWordBitWidth) {
     210
     211    Type * intScanWordTy = iBuilder->getIntNTy(scanWordBitWidth);
     212    const unsigned fieldCount = iBuilder->getBitBlockWidth() / scanWordBitWidth;
     213    Type * scanWordVectorType =  VectorType::get(intScanWordTy, fieldCount);
     214    const unsigned vectorWidth = iBuilder->getBitBlockWidth() / 32;
     215    Type * gatherVectorType =  VectorType::get(iBuilder->getInt32Ty(), vectorWidth);
     216
     217    const unsigned baseIdx = kBuilder->addInternalState(iBuilder->getInt8PtrTy(), "Base");
     218    const unsigned startIndexIdx = kBuilder->addInternalState(iBuilder->getInt32Ty(), "StartIndex");
     219    const unsigned startArrayIdx = kBuilder->addInternalState(ArrayType::get(iBuilder->getInt32Ty(), iBuilder->getBitBlockWidth() + vectorWidth), "StartArray");
     220    const unsigned endIndexIdx = kBuilder->addInternalState(iBuilder->getInt32Ty(), "EndIndex");
     221    const unsigned endArrayIdx = kBuilder->addInternalState(gatherVectorType, "EndArray");
     222
     223    kBuilder->addInputStream(1, "startStream");
     224    kBuilder->addInputStream(1, "endStream");
     225
     226    Function * function = kBuilder->prepareFunction();
     227
     228    BasicBlock * const entry = iBuilder->GetInsertBlock();
     229
     230    BasicBlock * startOuterCond = BasicBlock::Create(mMod->getContext(), "startOuterCond", function, 0);
     231    BasicBlock * startOuterBody = BasicBlock::Create(mMod->getContext(), "startOuterBody", function, 0);
     232    BasicBlock * startInnerCond = BasicBlock::Create(mMod->getContext(), "startInnerCond", function, 0);
     233    BasicBlock * startInnerBody = BasicBlock::Create(mMod->getContext(), "startInnerBody", function, 0);
     234
     235    BasicBlock * endOuterCond = BasicBlock::Create(mMod->getContext(), "endOuterCond", function, 0);
     236    BasicBlock * endOuterBody = BasicBlock::Create(mMod->getContext(), "endOuterBody", function, 0);
     237    BasicBlock * endInnerCond = BasicBlock::Create(mMod->getContext(), "endInnerCond", function, 0);
     238    BasicBlock * endInnerBody = BasicBlock::Create(mMod->getContext(), "endInnerBody", function, 0);
     239
     240    BasicBlock * gatherInit = BasicBlock::Create(mMod->getContext(), "gatherInit", function, 0);
     241
     242    BasicBlock * gatherFullCond = BasicBlock::Create(mMod->getContext(), "gatherFullCond", function, 0);
     243    BasicBlock * gatherFullBody = BasicBlock::Create(mMod->getContext(), "gatherFullBody", function, 0);
     244
     245//    BasicBlock * gatherPartialCond = BasicBlock::Create(mMod->getContext(), "gatherPartialCond", function, 0);
     246//    BasicBlock * gatherPartialBody = BasicBlock::Create(mMod->getContext(), "gatherPartialBody", function, 0);
     247
     248    BasicBlock * exit = BasicBlock::Create(mMod->getContext(), "exit", function, 0);
     249
     250    //TODO: this won't work on files > 2^32 bytes yet; needs an intermediate flush then a recalculation of the base pointer.
     251    Value * const base = iBuilder->CreateLoad(kBuilder->getInternalState(baseIdx), "base");
     252    Value * blockPos = iBuilder->CreateLoad(kBuilder->getBlockNo());
     253    blockPos = iBuilder->CreateMul(blockPos, iBuilder->getInt64(iBuilder->getBitBlockWidth()));
     254
     255    // if two positions cannot be in the same vector element, we could possibly do some work in parallel here.
     256    Value * startIndex = iBuilder->CreateLoad(kBuilder->getInternalState(startIndexIdx), "startIndex");
     257    Value * startArray = kBuilder->getInternalState(startArrayIdx);
     258    Value * startStream = iBuilder->CreateBitCast(iBuilder->CreateBlockAlignedLoad(kBuilder->getInputStream(0)), scanWordVectorType, "startStream");
     259
     260    Value * endIndex = iBuilder->CreateLoad(kBuilder->getInternalState(endIndexIdx), "endIndex");
     261    Value * endArray = kBuilder->getInternalState(endArrayIdx);
     262    Value * endStream = iBuilder->CreateBitCast(iBuilder->CreateBlockAlignedLoad(kBuilder->getInputStream(1)), scanWordVectorType, "endStream");
     263
     264    iBuilder->CreateBr(startOuterCond);
     265    iBuilder->SetInsertPoint(startOuterCond);
     266
     267    PHINode * startIV = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 2);
     268    startIV->addIncoming(iBuilder->getInt64(0), entry);
     269    Value * startOuterTest = iBuilder->CreateICmpNE(startIV, iBuilder->getInt64(fieldCount));
     270    iBuilder->CreateCondBr(startOuterTest, startOuterBody, endOuterCond);
     271
     272    iBuilder->SetInsertPoint(startOuterBody);
     273    Value * startField = iBuilder->CreateExtractElement(startStream, startIV);
     274    startIV->addIncoming(iBuilder->CreateAdd(startIV, iBuilder->getInt64(1)), startInnerCond);
     275    iBuilder->CreateBr(startInnerCond);
     276
     277    iBuilder->SetInsertPoint(startInnerCond);
     278    PHINode * startIndexPhi = iBuilder->CreatePHI(startIndex->getType(), 2);
     279    startIndexPhi->addIncoming(startIndex, startOuterBody);
     280    PHINode * startFieldPhi = iBuilder->CreatePHI(intScanWordTy, 2);
     281    startFieldPhi->addIncoming(startField, startOuterBody);
     282    Value * test = iBuilder->CreateICmpNE(startFieldPhi, ConstantInt::getNullValue(intScanWordTy));
     283    iBuilder->CreateCondBr(test, startInnerBody, startOuterCond);
     284
     285    iBuilder->SetInsertPoint(startInnerBody);
     286    Value * startPos = generateCountForwardZeroes(iBuilder, startFieldPhi);
     287    startFieldPhi->addIncoming(generateResetLowestBit(iBuilder, startFieldPhi), startInnerBody);
     288    startPos = iBuilder->CreateTruncOrBitCast(iBuilder->CreateOr(startPos, blockPos), iBuilder->getInt32Ty());
     289    iBuilder->CreateStore(startPos, iBuilder->CreateGEP(startArray, {iBuilder->getInt32(0), startIndexPhi}));
     290    startIndexPhi->addIncoming(iBuilder->CreateAdd(startIndexPhi, ConstantInt::get(startIndexPhi->getType(), 1)), startInnerBody);
     291    iBuilder->CreateBr(startInnerCond);
     292    // END POINT OUTER COND
     293    iBuilder->SetInsertPoint(endOuterCond);
     294    PHINode * endIV = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 2);
     295    endIV->addIncoming(iBuilder->getInt64(0), startOuterCond);
     296    Value * endOuterTest = iBuilder->CreateICmpNE(endIV, iBuilder->getInt64(fieldCount));
     297    iBuilder->CreateCondBr(endOuterTest, endOuterBody, exit);
     298    // END POINT OUTER BODY
     299    iBuilder->SetInsertPoint(endOuterBody);
     300    Value * endField = iBuilder->CreateExtractElement(endStream, endIV);
     301    endIV->addIncoming(iBuilder->CreateAdd(endIV, iBuilder->getInt64(1)), endInnerCond);
     302    iBuilder->CreateBr(endInnerCond);
     303    // END POINT INNER COND
     304    iBuilder->SetInsertPoint(endInnerCond);
     305    PHINode * endIndexPhi = iBuilder->CreatePHI(endIndex->getType(), 3);
     306    endIndexPhi->addIncoming(endIndex, endOuterBody);
     307    PHINode * endFieldPhi = iBuilder->CreatePHI(intScanWordTy, 3);
     308    endFieldPhi->addIncoming(endField, endOuterBody);
     309    Value * endInnerTest = iBuilder->CreateICmpNE(endFieldPhi, ConstantInt::getNullValue(intScanWordTy));
     310    iBuilder->CreateCondBr(endInnerTest, endInnerBody, endOuterCond);
     311    // END POINT INNER BODY
     312    iBuilder->SetInsertPoint(endInnerBody);
     313    Value * endPos = generateCountForwardZeroes(iBuilder, endFieldPhi);
     314    Value * updatedEndFieldPhi = generateResetLowestBit(iBuilder, endFieldPhi);
     315    endFieldPhi->addIncoming(updatedEndFieldPhi, endInnerBody);
     316    endPos = iBuilder->CreateTruncOrBitCast(iBuilder->CreateOr(endPos, blockPos), iBuilder->getInt32Ty());
     317    iBuilder->CreateStore(endPos, iBuilder->CreateGEP(endArray, {iBuilder->getInt32(0), endIndexPhi}));
     318    Value * updatedEndIndexPhi = iBuilder->CreateAdd(endIndexPhi, ConstantInt::get(endIndexPhi->getType(), 1));
     319    endIndexPhi->addIncoming(updatedEndIndexPhi, endInnerBody);
     320    Value * filledEndPosBufferTest = iBuilder->CreateICmpEQ(updatedEndIndexPhi, ConstantInt::get(updatedEndIndexPhi->getType(), vectorWidth));
     321    iBuilder->CreateCondBr(filledEndPosBufferTest, gatherInit, endInnerCond);
     322    // GATHER INIT
     323    iBuilder->SetInsertPoint(gatherInit);
     324    Value * rawTokenBuffer = iBuilder->CreateAlloca(ArrayType::get(gatherVectorType, (maxKeyLength / 4) + (maxKeyLength % 4) != 0 ? 1 : 0));
     325    rawTokenBuffer = iBuilder->CreatePointerCast(rawTokenBuffer, PointerType::get(gatherVectorType, 0));
     326    Value * const startPositions = iBuilder->CreateAlignedLoad(iBuilder->CreatePointerCast(startArray, PointerType::get(gatherVectorType, 0)), 4);
     327    iBuilder->CreateBr(gatherFullCond);
     328    // GATHER FULL COND
     329    iBuilder->SetInsertPoint(gatherFullCond);
     330
     331    endIndexPhi->addIncoming(iBuilder->getInt32(0), gatherFullCond);
     332    endFieldPhi->addIncoming(updatedEndFieldPhi, gatherFullCond);
     333
     334    PHINode * fullGatherIV = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 2);
     335    fullGatherIV->addIncoming(iBuilder->getInt64(0), gatherInit);
     336    PHINode * startPositionsPhi = iBuilder->CreatePHI(startPositions->getType(), 2);
     337    startPositionsPhi->addIncoming(startPositions, gatherInit);
     338
     339    Value * fullGatherTest = iBuilder->CreateICmpNE(fullGatherIV, iBuilder->getInt64(minKeyLength / vectorWidth));
     340    iBuilder->CreateCondBr(fullGatherTest, gatherFullBody, endInnerCond);
     341    // GATHER FULL BODY
     342    iBuilder->SetInsertPoint(gatherFullBody);
     343    Value * gathered = generateGather(base, startPositionsPhi);
     344    startPositionsPhi->addIncoming(iBuilder->CreateAdd(startPositionsPhi, iBuilder->CreateVectorSplat(vectorWidth, iBuilder->getInt32(4))), gatherFullBody);
     345    iBuilder->CreateAlignedStore(gathered, iBuilder->CreateGEP(rawTokenBuffer, fullGatherIV), 4);
     346    fullGatherIV->addIncoming(iBuilder->CreateAdd(fullGatherIV, iBuilder->getInt64(1)), gatherFullBody);
     347    iBuilder->CreateBr(gatherFullCond);
     348
     349    iBuilder->SetInsertPoint(exit);
     350    // need to save the start/end index still
     351    kBuilder->finalize();
    117352}
    118353
     
    133368    PabloFunction * const sorting = generateSortingFunction(leading, endpoints);
    134369
    135     mS2PKernel = new KernelBuilder("s2p", mMod, iBuilder);
    136     mLeadingKernel = new KernelBuilder("leading", mMod, iBuilder);
    137     mSortingKernel = new KernelBuilder("sorting", mMod, iBuilder);
    138 
    139     mLeadingKernel->setLongestLookaheadAmount(mLongestLookahead);
    140     mSortingKernel->setLongestLookaheadAmount(mLongestLookahead);
     370    const auto bufferSize = ((mLongestLookahead + iBuilder->getBitBlockWidth() - 1) / iBuilder->getBitBlockWidth()) + 1;
     371
     372    mS2PKernel = new KernelBuilder("s2p", mMod, iBuilder, 1);
     373    mLeadingKernel = new KernelBuilder("leading", mMod, iBuilder, bufferSize);
     374    mSortingKernel = new KernelBuilder("sorting", mMod, iBuilder, bufferSize);
     375    mScannerKernel = new KernelBuilder("scanner", mMod, iBuilder, 1);
    141376
    142377    generateS2PKernel(mMod, iBuilder, mS2PKernel);
     
    151386
    152387    releaseSlabAllocatorMemory();
     388
     389    generateScannerKernel(mScannerKernel, 1, 1, 64);
     390
    153391}
    154392
     
    163401
    164402    Value * const inputStream = args++;
    165     inputStream->setName("input");
     403    inputStream->setName("inputStream");
    166404
    167405    Value * const bufferSize = args++;
    168     bufferSize->setName("buffersize");
     406    bufferSize->setName("bufferSize");
    169407
    170408    iBuilder->SetInsertPoint(BasicBlock::Create(mMod->getContext(), "entry", main,0));
     
    173411
    174412    BasicBlock * leadingTestBlock = BasicBlock::Create(mMod->getContext(), "leadingCond", main, 0);
     413    BasicBlock * safetyCheckBlock = BasicBlock::Create(mMod->getContext(), "safetyCheck", main, 0);
    175414    BasicBlock * leadingBodyBlock = BasicBlock::Create(mMod->getContext(), "leadingBody", main, 0);
    176415
     
    211450    remainingBytes->addIncoming(bufferSize, entryBlock);
    212451    Value * leadingBlocksCond = iBuilder->CreateICmpULT(blockNo, iBuilder->getInt64(leadingBlocks));
    213     iBuilder->CreateCondBr(leadingBlocksCond, leadingBodyBlock, regularTestBlock);
     452    iBuilder->CreateCondBr(leadingBlocksCond, safetyCheckBlock, regularTestBlock);
     453
     454    iBuilder->SetInsertPoint(safetyCheckBlock);
     455    Value * safetyCheckCond = iBuilder->CreateICmpULT(remainingBytes, blockSize);
     456    iBuilder->CreateCondBr(safetyCheckCond, regularExitBlock, leadingBodyBlock);
     457
    214458    iBuilder->SetInsertPoint(leadingBodyBlock);
    215459    s2pInstance->CreateDoBlockCall();
     
    221465    // Now all the data for which we can produce and consume a full leading block...
    222466    iBuilder->SetInsertPoint(regularTestBlock);
    223     PHINode * blockNo2 = iBuilder->CreatePHI(intType, 2);
    224     blockNo2->addIncoming(blockNo, leadingTestBlock);
    225467    PHINode * remainingBytes2 = iBuilder->CreatePHI(intType, 2);
    226468    remainingBytes2->addIncoming(remainingBytes, leadingTestBlock);
    227     Value * remainingBytesCond = iBuilder->CreateICmpUGE(remainingBytes2, requiredBytes);
    228     iBuilder->CreateCondBr(remainingBytesCond, regularBodyBlock, regularExitBlock);
     469    Value * remainingBytesCond = iBuilder->CreateICmpULT(remainingBytes2, requiredBytes);
     470    iBuilder->CreateCondBr(remainingBytesCond, regularExitBlock, regularBodyBlock);
    229471    iBuilder->SetInsertPoint(regularBodyBlock);
    230472    s2pInstance->CreateDoBlockCall();
    231473    leadingInstance->CreateDoBlockCall();
    232474    sortingInstance->CreateDoBlockCall();
    233     blockNo2->addIncoming(iBuilder->CreateAdd(blockNo2, iBuilder->getInt64(1)), regularBodyBlock);
    234475    remainingBytes2->addIncoming(iBuilder->CreateSub(remainingBytes2, blockSize), regularBodyBlock);
    235476    iBuilder->CreateBr(regularTestBlock);
    236477
    237 
    238478    // Check if we have a partial blocks worth of leading data remaining
    239479    iBuilder->SetInsertPoint(regularExitBlock);
    240     Value * partialBlockCond = iBuilder->CreateICmpUGT(remainingBytes2, ConstantInt::getNullValue(intType));
    241     iBuilder->CreateCondBr(partialBlockCond, partialBlock, finalTestBlock);
     480    PHINode * remainingBytes3 = iBuilder->CreatePHI(intType, 2);
     481    remainingBytes3->addIncoming(remainingBytes, safetyCheckBlock);
     482    remainingBytes3->addIncoming(remainingBytes2, regularTestBlock);
     483    Value * partialBlockCond = iBuilder->CreateICmpNE(remainingBytes3, ConstantInt::getNullValue(intType));
     484    iBuilder->CreateCondBr(partialBlockCond, finalTestBlock, partialBlock);
    242485
    243486    // If we do, process it and mask out the data
    244487    iBuilder->SetInsertPoint(partialBlock);
    245488    s2pInstance->CreateDoBlockCall();
    246     Value * partialLeadingData[2];
    247     for (unsigned i = 0; i < 2; ++i) {
    248         partialLeadingData[i] = leadingInstance->getOutputStream(i);
    249     }
    250489    leadingInstance->CreateDoBlockCall();
    251     Type * fullBitBlockType = iBuilder->getIntNTy(mBlockSize);
    252     Value * remaining = iBuilder->CreateZExt(iBuilder->CreateSub(blockSize, remainingBytes2), fullBitBlockType);
    253     Value * eofMask = iBuilder->CreateLShr(ConstantInt::getAllOnesValue(fullBitBlockType), remaining);
    254     eofMask = iBuilder->CreateBitCast(eofMask, mBitBlockType);
    255     for (unsigned i = 0; i < 2; ++i) {
    256         Value * value = iBuilder->CreateAnd(iBuilder->CreateBlockAlignedLoad(partialLeadingData[i]), eofMask);
    257         iBuilder->CreateBlockAlignedStore(value, partialLeadingData[i]);
    258     }
    259     for (unsigned i = 0; i < 2; ++i) {
    260         iBuilder->CreateBlockAlignedStore(ConstantInt::getNullValue(mBitBlockType), leadingInstance->getOutputStream(i));
    261     }
     490    leadingInstance->clearOutputStreamSet();
    262491    sortingInstance->CreateDoBlockCall();
    263492    iBuilder->CreateBr(finalTestBlock);
     
    272501
    273502    iBuilder->SetInsertPoint(finalBodyBlock);
    274     for (unsigned i = 0; i < 2; ++i) {
    275         iBuilder->CreateBlockAlignedStore(ConstantInt::getNullValue(mBitBlockType), leadingInstance->getOutputStream(i));
    276     }
    277     Value * blockNoPtr = leadingInstance->getBlockNo();
    278     Value * blockNoValue = iBuilder->CreateLoad(blockNoPtr);
    279     blockNoValue = iBuilder->CreateAdd(blockNoValue, ConstantInt::get(blockNoValue->getType(), 1));
    280     iBuilder->CreateStore(blockNoValue, blockNoPtr);
    281 
     503    leadingInstance->clearOutputStreamSet();
    282504    sortingInstance->CreateDoBlockCall();
    283 
    284505    remainingFullBlocks->addIncoming(iBuilder->CreateSub(remainingFullBlocks, iBuilder->getInt64(1)), finalBodyBlock);
    285506
    286507    iBuilder->CreateBr(finalTestBlock);
    287 
    288508    iBuilder->SetInsertPoint(exitBlock);
    289509    iBuilder->CreateRetVoid();
    290 
    291     main->dump();
    292510
    293511    return main;
     
    298516    delete mLeadingKernel;
    299517    delete mSortingKernel;
    300 }
    301 
    302 }
     518    delete mScannerKernel;
     519}
     520
     521
     522}
  • icGREP/icgrep-devel/icgrep/kernels/symboltablepipeline.h

    r4974 r4991  
    3232    pablo::PabloFunction * generateSortingFunction(const pablo::PabloFunction * const leading, const std::vector<unsigned> & endpoints);
    3333
     34    void generateScannerKernel(KernelBuilder * kBuilder, const unsigned minKeyLength, const unsigned maxKeyLength, const unsigned scanWordBitWidth = 64);
     35    Function * generateScanWordRoutine(KernelBuilder * const kBuilder, const unsigned scanWordBitWidth);
     36
     37    Value * generateGather(Value * const base, Value * const vindex);
     38    Value * generateMaskedGather(Value * const base, Value * const vindex, Value * const mask);
     39
    3440    void generateLLVMParser();
    3541
     
    4046    KernelBuilder *                     mLeadingKernel;
    4147    KernelBuilder *                     mSortingKernel;
     48    KernelBuilder *                     mScannerKernel;
     49
    4250    unsigned                            mLongestLookahead;
    4351    llvm::Type *                        mBitBlockType;
  • icGREP/icgrep-devel/icgrep/pablo/analysis/pabloverifier.cpp

    r4959 r4991  
    44#include <pablo/printer_pablos.h>
    55#include <iostream>
    6 #ifdef USE_BOOST
    76#include <boost/container/flat_set.hpp>
    8 #else
    9 #include <unordered_set>
    10 #endif
    117#include <queue>
    128
     
    1410namespace pablo {
    1511
    16 #ifdef USE_BOOST
    1712template <typename Type>
    1813using SmallSet = boost::container::flat_set<Type>;
    19 #else
    20 template <typename Type>
    21 using SmallSet = std::unordered_set<Type>;
    22 #endif
    2314
    2415using ScopeSet = SmallSet<const PabloBlock *>;
  • icGREP/icgrep-devel/icgrep/toolchain.cpp

    r4990 r4991  
    7373    initializeLowerIntrinsicsPass(*Registry);
    7474
    75 //    llvm::PassManager pm;
    76 //    pm.add(createBasicAliasAnalysisPass());
    77 //    pm.add(createEarlyCSEPass());
    78 //    pm.add(createPromoteMemoryToRegisterPass());
    79 //    pm.add(createInstructionCombiningPass());
    80 //    pm.add(createConstantPropagationPass());
    81 //    pm.add(createDeadCodeEliminationPass());
    82 //    pm.run(*m);
    83 
    8475    std::string errMessage;
    8576    EngineBuilder builder(std::move(std::unique_ptr<Module>(m)));
Note: See TracChangeset for help on using the changeset viewer.