Ignore:
Timestamp:
Mar 25, 2016, 5:42:03 PM (3 years ago)
Author:
nmedfort
Message:

Symbol table work and untested kernel instatiate method for multiple input streams

Location:
icGREP/icgrep-devel/icgrep/kernels
Files:
8 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/casefold_pipeline.cpp

    r4988 r4991  
    141141    PHINode * remainingBytes = iBuilder->CreatePHI(int64ty, 2, "remainingBytes");
    142142    remainingBytes->addIncoming(initialBufferSize, initialBlock);
    143     //iBuilder->CallPrintInt("remainingBytes", iBuilder->CreatePtrToInt(remainingBytes, iBuilder->getInt64Ty()));
    144143
    145144    Constant * const step = ConstantInt::get(int64ty, mBlockSize);
     
    169168   
    170169    iBuilder->SetInsertPoint(finalEmptyBlock);
    171     s2pInstance->clearOutputStream();
     170    s2pInstance->clearOutputStreamSet();
    172171    iBuilder->CreateBr(endBlock);
    173172   
  • icGREP/icgrep-devel/icgrep/kernels/instance.h

    r4986 r4991  
    4949    }
    5050
    51     void clearOutputStream(const unsigned streamOffset = 0) {
    52         mDefinition->clearOutputStream(mMemory, streamOffset);
     51    void clearOutputStreamSet(const unsigned streamOffset = 0) {
     52        mDefinition->clearOutputStreamSet(mMemory, streamOffset);
    5353    }
    5454
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r4988 r4991  
    88#include <IDISA/idisa_builder.h>
    99#include <kernels/instance.h>
     10#include <tuple>
     11#include <boost/functional/hash_fwd.hpp>
     12#include <unordered_map>
    1013
    1114using namespace llvm;
     
    5154unsigned KernelBuilder::addInternalState(llvm::Type * const type, std::string && name) {
    5255    if (LLVM_UNLIKELY(mInternalStateNameMap.count(name) != 0)) {
    53         throw std::runtime_error("Kernel already contains internal state " + name);
     56        throw std::runtime_error("Kernel already contains internal state '" + name + "'");
    5457    }
    5558    const unsigned index = addInternalState(type);
     
    121124Value * KernelBuilder::getInputStream(Value * const instance, const unsigned index, const unsigned streamOffset) {
    122125    assert (instance);
     126    assert (index < mInputStream.size());
    123127    Value * inputStream = iBuilder->CreateLoad(iBuilder->CreateGEP(instance,
    124128        {iBuilder->getInt32(0), iBuilder->getInt32(INPUT_STREAM_SET), iBuilder->getInt32(0)}));
     
    129133        offset = iBuilder->CreateAdd(offset, ConstantInt::get(offset->getType(), streamOffset));
    130134    }   
    131     offset = iBuilder->CreateCall(modFunction, offset, "offset");
    132     return iBuilder->CreateGEP(inputStream, { offset, iBuilder->getInt32(index) });
     135    return iBuilder->CreateGEP(inputStream, { iBuilder->CreateCall(modFunction, offset), iBuilder->getInt32(index) });
    133136}
    134137
     
    178181Value * KernelBuilder::getOutputStream(Value * const instance, const unsigned index, const unsigned streamOffset) {
    179182    assert (instance);
    180     Value * offset = getOffset(instance, streamOffset);
     183    Value * const offset = getOffset(instance, streamOffset);
    181184    Value * const indices[] = {iBuilder->getInt32(0), iBuilder->getInt32(OUTPUT_STREAM_SET), offset, iBuilder->getInt32(index)};
    182185    return iBuilder->CreateGEP(instance, indices);
     
    208211    FunctionType * const functionType = FunctionType::get(iBuilder->getVoidTy(), {PointerType::get(mKernelStateType, 0)}, false);
    209212    mDoBlock = Function::Create(functionType, GlobalValue::ExternalLinkage, mKernelName + "_DoBlock", mMod);
    210     mDoBlock->setCallingConv(CallingConv::C);
    211   //  mDoBlock->addAttribute(1, Attribute::NoCapture);
    212  //   mDoBlock->addAttribute(AttributeSet::FunctionIndex, Attribute::ReadNone);
    213  //   mDoBlock->addAttribute(AttributeSet::FunctionIndex, Attribute::NoUnwind);
     213    mDoBlock->setCallingConv(CallingConv::C);   
     214    mDoBlock->setDoesNotCapture(1);
     215    mDoBlock->setDoesNotThrow();
    214216
    215217    Function::arg_iterator args = mDoBlock->arg_begin();
    216     mKernelParam = args++;
    217     mKernelParam->setName("this");
     218    mKernelState = args++;
     219    mKernelState->setName("this");
    218220
    219221    iBuilder->SetInsertPoint(BasicBlock::Create(mMod->getContext(), "entry", mDoBlock, 0));
    220 
    221 //    mLocalBlockNo = iBuilder->CreateLoad(getBlockNo());
    222 //    Value * blockNo = iBuilder->CreateLoad(getBlockNo());
    223 //    iBuilder->CallPrintInt(mKernelName + "_BlockNo", blockNo);
    224 //    Value * modFunction = iBuilder->CreateLoad(iBuilder->CreateGEP(mKernelParam, {iBuilder->getInt32(0), iBuilder->getInt32(INPUT_STREAM_SET), iBuilder->getInt32(1)}));
    225 //    blockNo = iBuilder->CreateCall(modFunction, blockNo);
    226 //    iBuilder->CallPrintInt(mKernelName + "_Offset", blockNo);
    227 
    228222
    229223    return mDoBlock;
     
    242236    iBuilder->CreateRetVoid();
    243237
     238    eliminateRedundantMemoryOperations(mDoBlock);
     239
    244240    // Generate the zero initializer
    245241    PointerType * modFunctionType = PointerType::get(FunctionType::get(iBuilder->getInt64Ty(), {iBuilder->getInt64Ty()}, false), 0);
     
    248244    mConstructor = Function::Create(constructorType, GlobalValue::ExternalLinkage, mKernelName + "_Constructor", mMod);
    249245    mConstructor->setCallingConv(CallingConv::C);
    250     mConstructor->addAttribute(1, Attribute::NoCapture);
    251     //mConstructor->addAttribute(AttributeSet::FunctionIndex, Attribute::InlineHint);
    252    // mConstructor->addAttribute(AttributeSet::FunctionIndex, Attribute::ReadNone);
    253     //mConstructor->addAttribute(AttributeSet::FunctionIndex, Attribute::NoUnwind);
     246    mDoBlock->setDoesNotCapture(1);
     247    mConstructor->addAttribute(AttributeSet::FunctionIndex, Attribute::InlineHint);
     248    mDoBlock->setDoesNotThrow();
     249
    254250    auto args = mConstructor->arg_begin();
    255     mKernelParam = args++;
    256     mKernelParam->setName("this");
     251    mKernelState = args++;
     252    mKernelState->setName("this");
    257253    Value * const inputStream = args++;
    258254    inputStream->setName("inputStream");
     
    272268    }
    273269
    274     Value * const input = iBuilder->CreateGEP(mKernelParam, {iBuilder->getInt32(0), iBuilder->getInt32(INPUT_STREAM_SET)});
     270    Value * const input = iBuilder->CreateGEP(mKernelState, {iBuilder->getInt32(0), iBuilder->getInt32(INPUT_STREAM_SET)});
    275271    iBuilder->CreateStore(inputStream, iBuilder->CreateGEP(input, {iBuilder->getInt32(0), iBuilder->getInt32(0)}));
    276272    iBuilder->CreateStore(modFunction, iBuilder->CreateGEP(input, {iBuilder->getInt32(0), iBuilder->getInt32(1)}));
     
    287283//        mStreamSetFunction->addAttribute(2, Attribute::NoCapture);
    288284//        mStreamSetFunction->addAttribute(AttributeSet::FunctionIndex, Attribute::InlineHint);
    289 //        mStreamSetFunction->addAttribute(AttributeSet::FunctionIndex, Attribute::ReadNone);
    290285//        mStreamSetFunction->addAttribute(AttributeSet::FunctionIndex, Attribute::NoUnwind);
    291286//        Value * offset = arg;
     
    308303
    309304/** ------------------------------------------------------------------------------------------------------------- *
     305 * @brief eliminateRedundantMemoryOperations
     306 ** ------------------------------------------------------------------------------------------------------------- */
     307inline void KernelBuilder::eliminateRedundantMemoryOperations(Function * const function) {
     308
     309
     310}
     311
     312/** ------------------------------------------------------------------------------------------------------------- *
    310313 * @brief instantiate
    311314 *
     
    327330Instance * KernelBuilder::instantiate(llvm::Value * const inputStream) {
    328331    AllocaInst * const memory = iBuilder->CreateAlloca(mKernelStateType);
    329     Value * ptr = inputStream;
    330     iBuilder->CreateCall3(mConstructor, memory, iBuilder->CreatePointerCast(ptr, mInputStreamType), CreateModFunction(0));
     332    iBuilder->CreateCall3(mConstructor, memory, iBuilder->CreatePointerCast(inputStream, mInputStreamType), CreateModFunction(0));
     333    return new Instance(this, memory);
     334}
     335
     336/** ------------------------------------------------------------------------------------------------------------- *
     337 * @brief instantiate
     338 *
     339 * Generate a new instance of this kernel and call the default constructor to initialize it
     340 ** ------------------------------------------------------------------------------------------------------------- */
     341Instance * KernelBuilder::instantiate(std::initializer_list<llvm::Value *> inputStreams) {
     342    if (mInputStreamType->getStructNumElements() != inputStreams.size()) {
     343        throw std::runtime_error(mKernelName + ".instantiate expected " + std::to_string(inputStreams.size()) +
     344                                 "elements but was given " + std::to_string(mInputStreamType->getStructNumElements()));
     345    }
     346    AllocaInst * const memory = iBuilder->CreateAlloca(mKernelStateType);
     347    AllocaInst * inputStruct = iBuilder->CreateAlloca(mInputStreamType, 0);
     348    unsigned i = 0;
     349    for (Value * inputStream : inputStreams) {
     350        Value * ptr = iBuilder->CreateGEP(inputStruct, { iBuilder->getInt32(0), iBuilder->getInt32(i++)});
     351        iBuilder->CreateStore(inputStream, ptr);
     352    }
     353    iBuilder->CreateCall3(mConstructor, memory, iBuilder->CreatePointerCast(inputStruct, mInputStreamType), CreateModFunction(0));
    331354    return new Instance(this, memory);
    332355}
     
    341364
    342365/** ------------------------------------------------------------------------------------------------------------- *
    343  * @brief clearOutputStream
    344  ** ------------------------------------------------------------------------------------------------------------- */
    345 void KernelBuilder::clearOutputStream(Value * const instance, const unsigned streamOffset) {
     366 * @brief clearOutputStreamSet
     367 *
     368 * Zero out the i + streamOffset stream set memory, where i is the current stream set indicated by the BlockNo.
     369 ** ------------------------------------------------------------------------------------------------------------- */
     370void KernelBuilder::clearOutputStreamSet(Value * const instance, const unsigned streamOffset) {
    346371    Value * const indices[] = {iBuilder->getInt32(0), iBuilder->getInt32(OUTPUT_STREAM_SET), getOffset(instance, streamOffset)};
    347     Value * ptr = iBuilder->CreateGEP(instance, indices, "ptr");
     372    Value * ptr = iBuilder->CreateGEP(instance, indices);
    348373    unsigned size = 0;
    349374    for (unsigned i = 0; i < mOutputStreamType->getStructNumElements(); ++i) {
     
    408433}
    409434
    410 /** ------------------------------------------------------------------------------------------------------------- *
    411  * @brief setLongestLookaheadAmount
    412  ** ------------------------------------------------------------------------------------------------------------- */
    413 void KernelBuilder::setLongestLookaheadAmount(const unsigned bits) {
    414     const unsigned blockWidth = iBuilder->getBitBlockWidth();
    415     const unsigned lookaheadBlocks = (bits + blockWidth - 1) / blockWidth;
    416     mBufferSize = (lookaheadBlocks + 1);
    417 }
    418 
    419435} // end of namespace kernel
  • icGREP/icgrep-devel/icgrep/kernels/kernel.h

    r4986 r4991  
    5959
    6060    inline llvm::Value * getInputStream(const unsigned index, const unsigned streamOffset = 0) {
    61         return getInputStream(mKernelParam, index, streamOffset);
     61        return getInputStream(mKernelState, index, streamOffset);
    6262    }
    6363
    6464    inline llvm::Value * getInputScalar(const unsigned index) {
    65         return getInputScalar(mKernelParam, index);
     65        return getInputScalar(mKernelState, index);
    6666    }
    6767
    6868    llvm::Value * getInternalState(const std::string & name) {
    69         return getInternalState(mKernelParam, name);
     69        return getInternalState(mKernelState, name);
    7070    }
    7171
    7272    void setInternalState(const std::string & name, llvm::Value * value) {
    73         setInternalState(mKernelParam, name, value);
     73        setInternalState(mKernelState, name, value);
    7474    }
    7575
    7676    llvm::Value * getInternalState(const unsigned index) {
    77         return getInternalState(mKernelParam, index);
     77        return getInternalState(mKernelState, index);
    7878    }
    7979
    8080    void setInternalState(const unsigned index, llvm::Value * value) {
    81         setInternalState(mKernelParam, index, value);
     81        setInternalState(mKernelState, index, value);
    8282    }
    8383
    8484    llvm::Value * getOutputStream(const unsigned index, const unsigned streamOffset = 0) {
    85         return getOutputStream(mKernelParam, index, streamOffset);
     85        return getOutputStream(mKernelState, index, streamOffset);
    8686    }
    8787
     
    9191
    9292    llvm::Value * getOutputScalar(const unsigned index) {
    93         return getOutputScalar(mKernelParam, index);
     93        return getOutputScalar(mKernelState, index);
    9494    }
    9595
     
    9999
    100100    llvm::Value * getBlockNo() {
    101         return getBlockNo(mKernelParam);
     101        return getBlockNo(mKernelState);
    102102    }
    103103
     
    114114    kernel::Instance * instantiate(llvm::Value * const inputStream);
    115115
    116     kernel::Instance * instantiate(std::pair<llvm::Value *, unsigned> &&inputStream);
     116    kernel::Instance * instantiate(std::initializer_list<llvm::Value *> inputStreams);
     117
     118    kernel::Instance * instantiate(std::pair<llvm::Value *, unsigned> && inputStream);
    117119
    118120    llvm::Type * getKernelStateType() const;
     
    122124    llvm::Function * getDoBlockFunction() const;
    123125
    124     void clearOutputStream(llvm::Value * const instance, const unsigned streamOffset = 0);
    125 
    126     void setLongestLookaheadAmount(const unsigned bits);
     126    void clearOutputStreamSet(llvm::Value * const instance, const unsigned streamOffset = 0);
    127127
    128128protected:
     
    153153
    154154    llvm::Function * CreateModFunction(const unsigned size);
     155
     156    void eliminateRedundantMemoryOperations(llvm::Function * const function);
    155157
    156158private:
     
    169171    llvm::Type *                        mOutputStreamType;
    170172
    171     llvm::Value *                       mInputParam;
    172     llvm::Value *                       mKernelParam;
     173    llvm::Value *                       mKernelState;
    173174    unsigned                            mBlockNoIndex;
    174175
     
    192193
    193194inline llvm::Value * KernelBuilder::getKernelState() const {
    194     return mKernelParam;
     195    return mKernelState;
    195196}
    196197
  • icGREP/icgrep-devel/icgrep/kernels/pipeline.cpp

    r4986 r4991  
    104104    scanMatchInstance->setInternalState("FileName", fileName);
    105105
    106     // iBuilder->CallPrintInt("source", iBuilder->CreatePtrToInt(ptr, iBuilder->getInt64Ty()));
    107 
    108106    Value * initialBufferSize = nullptr;
    109107    BasicBlock * initialBlock = nullptr;
     
    165163
    166164    iBuilder->SetInsertPoint(finalEmptyBlock);
    167     s2pInstance->clearOutputStream();
     165    s2pInstance->clearOutputStreamSet();
    168166    iBuilder->CreateBr(endBlock);
    169167
  • icGREP/icgrep-devel/icgrep/kernels/scanmatchgen.cpp

    r4974 r4991  
    227227    Value * kernelStuctParam = kBuilder->getKernelState();
    228228
    229     Value * scanwordPos = iBuilder->CreateBlockAlignedLoad(kBuilder->getInternalState("BlockNo"));
     229    Value * scanwordPos = iBuilder->CreateLoad(kBuilder->getInternalState("BlockNo"));
    230230    scanwordPos = iBuilder->CreateMul(scanwordPos, ConstantInt::get(scanwordPos->getType(), iBuilder->getBitBlockWidth()));
    231231
  • icGREP/icgrep-devel/icgrep/kernels/symboltablepipeline.cpp

    r4986 r4991  
    1515#include <pablo/function.h>
    1616#include <pablo/pablo_compiler.h>
     17#include <pablo/analysis/pabloverifier.hpp>
    1718
    1819#include <re/re_cc.h>
     
    2627#include <pablo/printer_pablos.h>
    2728#include <iostream>
     29
     30#include <llvm/IR/Intrinsics.h>
    2831
    2932using namespace re;
     
    8588PabloFunction * SymbolTableBuilder::generateSortingFunction(const PabloFunction * const leading, const std::vector<unsigned> & endpoints) {
    8689    PabloFunction * const function = PabloFunction::Create("sorting", leading->getNumOfResults(), leading->getNumOfResults() * 2);
    87     PabloBlock * const entry = function->getEntryBlock();
     90    PabloBlock * entry = function->getEntryBlock();
    8891    function->setParameter(0, entry->createVar("S"));
    8992    function->setParameter(1, entry->createVar("E"));
     
    100103        PabloAST * S = entry->createAnd(L, R);
    101104        Assign * Si = entry->createAssign("S_" + std::to_string(i), S);
    102         R = entry->createXor(R, S);
    103105        PabloAST * F = entry->createScanThru(R, E);
    104106        Assign * Ei = entry->createAssign("E_" + std::to_string(i), F);
    105107        function->setResult(i * 2, Si);
    106108        function->setResult(i * 2 + 1, Ei);
     109        R = entry->createXor(R, S);
    107110        ++i;
    108111        lowerbound = endpoint;
     
    114117    function->setResult(i * 2 + 1, Ei);
    115118    mLongestLookahead = lowerbound;
     119
    116120    return function;
     121}
     122
     123/** ------------------------------------------------------------------------------------------------------------- *
     124 * @brief generateCountForwardZeroes
     125 ** ------------------------------------------------------------------------------------------------------------- */
     126inline Value * generateCountForwardZeroes(IDISA::IDISA_Builder * iBuilder, Value * bits) {
     127    Value * cttzFunc = Intrinsic::getDeclaration(iBuilder->getModule(), Intrinsic::cttz, bits->getType());
     128    return iBuilder->CreateCall(cttzFunc, std::vector<Value *>({bits, ConstantInt::get(iBuilder->getInt1Ty(), 0)}));
     129}
     130
     131/** ------------------------------------------------------------------------------------------------------------- *
     132 * @brief generateGather
     133 ** ------------------------------------------------------------------------------------------------------------- */
     134inline Value * SymbolTableBuilder::generateGather(Value * const base, Value * const vindex) {
     135
     136    /*
     137        From Intel:
     138
     139        extern __m256i _mm256_i32gather_epi32(int const * base, __m256i vindex, const int scale)
     140
     141        From Clang avx2intrin.h:
     142
     143        #define _mm256_i32gather_epi32(m, i, s) __extension__ ({ \
     144          (__m256i)__builtin_ia32_gatherd_d256((__v8si)_mm256_undefined_si256(), \
     145                                               (int const *)(m),  \
     146                                               (__v8si)(__m256i)(i), \
     147                                               (__v8si)_mm256_set1_epi32(-1), (s)); })
     148
     149        From llvm IntrinsicsX86.td:
     150
     151        def llvm_ptr_ty        : LLVMPointerType<llvm_i8_ty>;             // i8*
     152
     153        def int_x86_avx2_gather_d_d_256 : GCCBuiltin<"__builtin_ia32_gatherd_d256">,
     154           Intrinsic<[llvm_v8i32_ty],
     155           [llvm_v8i32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty],
     156           [IntrReadArgMem]>;
     157
     158     */
     159
     160    VectorType * const vecType = VectorType::get(iBuilder->getInt32Ty(), 8);
     161    Function * vgather = Intrinsic::getDeclaration(iBuilder->getModule(), Intrinsic::x86_avx2_gather_d_d_256);
     162    return iBuilder->CreateCall(vgather, {Constant::getAllOnesValue(vecType), base, iBuilder->CreateBitCast(vindex, vecType), Constant::getAllOnesValue(vecType), iBuilder->getInt8(1)});
     163}
     164
     165/** ------------------------------------------------------------------------------------------------------------- *
     166 * @brief generateMaskedGather
     167 ** ------------------------------------------------------------------------------------------------------------- */
     168inline Value * SymbolTableBuilder::generateMaskedGather(Value * const base, Value * const vindex, Value * const mask) {
     169
     170    /*
     171        From Intel:
     172
     173        extern __m256i _mm256_mask_i32gather_epi32(__m256i def_vals, int const * base, __m256i vindex, __m256i vmask, const int scale);
     174
     175        From Clang avx2intrin.h:
     176
     177        #define _mm256_mask_i32gather_epi32(a, m, i, mask, s) __extension__ ({ \
     178           (__m256i)__builtin_ia32_gatherd_d256((__v8si)(__m256i)(a), \
     179                                                (int const *)(m), \
     180                                                (__v8si)(__m256i)(i), \
     181                                                (__v8si)(__m256i)(mask), (s)); })
     182        From llvm IntrinsicsX86.td:
     183
     184        def llvm_ptr_ty        : LLVMPointerType<llvm_i8_ty>;             // i8*
     185
     186        def int_x86_avx2_gather_d_d_256 : GCCBuiltin<"__builtin_ia32_gatherd_d256">,
     187           Intrinsic<[llvm_v8i32_ty],
     188           [llvm_v8i32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty],
     189           [IntrReadArgMem]>;
     190
     191     */
     192
     193    VectorType * const vecType = VectorType::get(iBuilder->getInt32Ty(), 8);
     194    Function * vgather = Intrinsic::getDeclaration(iBuilder->getModule(), Intrinsic::x86_avx2_gather_d_d_256);
     195    return iBuilder->CreateCall(vgather, {Constant::getNullValue(vecType), base, iBuilder->CreateBitCast(vindex, vecType), iBuilder->CreateBitCast(mask, vecType), iBuilder->getInt8(1)});
     196}
     197
     198/** ------------------------------------------------------------------------------------------------------------- *
     199 * @brief generateResetLowestBit
     200 ** ------------------------------------------------------------------------------------------------------------- */
     201inline Value * generateResetLowestBit(IDISA::IDISA_Builder * iBuilder, Value * bits) {
     202    Value * bits_minus1 = iBuilder->CreateSub(bits, ConstantInt::get(bits->getType(), 1));
     203    return iBuilder->CreateAnd(bits_minus1, bits);
     204}
     205
     206/** ------------------------------------------------------------------------------------------------------------- *
     207 * @brief generateScanMatch
     208 ** ------------------------------------------------------------------------------------------------------------- */
     209void SymbolTableBuilder::generateScannerKernel(KernelBuilder * kBuilder, const unsigned minKeyLength, const unsigned maxKeyLength, const unsigned scanWordBitWidth) {
     210
     211    Type * intScanWordTy = iBuilder->getIntNTy(scanWordBitWidth);
     212    const unsigned fieldCount = iBuilder->getBitBlockWidth() / scanWordBitWidth;
     213    Type * scanWordVectorType =  VectorType::get(intScanWordTy, fieldCount);
     214    const unsigned vectorWidth = iBuilder->getBitBlockWidth() / 32;
     215    Type * gatherVectorType =  VectorType::get(iBuilder->getInt32Ty(), vectorWidth);
     216
     217    const unsigned baseIdx = kBuilder->addInternalState(iBuilder->getInt8PtrTy(), "Base");
     218    const unsigned startIndexIdx = kBuilder->addInternalState(iBuilder->getInt32Ty(), "StartIndex");
     219    const unsigned startArrayIdx = kBuilder->addInternalState(ArrayType::get(iBuilder->getInt32Ty(), iBuilder->getBitBlockWidth() + vectorWidth), "StartArray");
     220    const unsigned endIndexIdx = kBuilder->addInternalState(iBuilder->getInt32Ty(), "EndIndex");
     221    const unsigned endArrayIdx = kBuilder->addInternalState(gatherVectorType, "EndArray");
     222
     223    kBuilder->addInputStream(1, "startStream");
     224    kBuilder->addInputStream(1, "endStream");
     225
     226    Function * function = kBuilder->prepareFunction();
     227
     228    BasicBlock * const entry = iBuilder->GetInsertBlock();
     229
     230    BasicBlock * startOuterCond = BasicBlock::Create(mMod->getContext(), "startOuterCond", function, 0);
     231    BasicBlock * startOuterBody = BasicBlock::Create(mMod->getContext(), "startOuterBody", function, 0);
     232    BasicBlock * startInnerCond = BasicBlock::Create(mMod->getContext(), "startInnerCond", function, 0);
     233    BasicBlock * startInnerBody = BasicBlock::Create(mMod->getContext(), "startInnerBody", function, 0);
     234
     235    BasicBlock * endOuterCond = BasicBlock::Create(mMod->getContext(), "endOuterCond", function, 0);
     236    BasicBlock * endOuterBody = BasicBlock::Create(mMod->getContext(), "endOuterBody", function, 0);
     237    BasicBlock * endInnerCond = BasicBlock::Create(mMod->getContext(), "endInnerCond", function, 0);
     238    BasicBlock * endInnerBody = BasicBlock::Create(mMod->getContext(), "endInnerBody", function, 0);
     239
     240    BasicBlock * gatherInit = BasicBlock::Create(mMod->getContext(), "gatherInit", function, 0);
     241
     242    BasicBlock * gatherFullCond = BasicBlock::Create(mMod->getContext(), "gatherFullCond", function, 0);
     243    BasicBlock * gatherFullBody = BasicBlock::Create(mMod->getContext(), "gatherFullBody", function, 0);
     244
     245//    BasicBlock * gatherPartialCond = BasicBlock::Create(mMod->getContext(), "gatherPartialCond", function, 0);
     246//    BasicBlock * gatherPartialBody = BasicBlock::Create(mMod->getContext(), "gatherPartialBody", function, 0);
     247
     248    BasicBlock * exit = BasicBlock::Create(mMod->getContext(), "exit", function, 0);
     249
     250    //TODO: this won't work on files > 2^32 bytes yet; needs an intermediate flush then a recalculation of the base pointer.
     251    Value * const base = iBuilder->CreateLoad(kBuilder->getInternalState(baseIdx), "base");
     252    Value * blockPos = iBuilder->CreateLoad(kBuilder->getBlockNo());
     253    blockPos = iBuilder->CreateMul(blockPos, iBuilder->getInt64(iBuilder->getBitBlockWidth()));
     254
     255    // if two positions cannot be in the same vector element, we could possibly do some work in parallel here.
     256    Value * startIndex = iBuilder->CreateLoad(kBuilder->getInternalState(startIndexIdx), "startIndex");
     257    Value * startArray = kBuilder->getInternalState(startArrayIdx);
     258    Value * startStream = iBuilder->CreateBitCast(iBuilder->CreateBlockAlignedLoad(kBuilder->getInputStream(0)), scanWordVectorType, "startStream");
     259
     260    Value * endIndex = iBuilder->CreateLoad(kBuilder->getInternalState(endIndexIdx), "endIndex");
     261    Value * endArray = kBuilder->getInternalState(endArrayIdx);
     262    Value * endStream = iBuilder->CreateBitCast(iBuilder->CreateBlockAlignedLoad(kBuilder->getInputStream(1)), scanWordVectorType, "endStream");
     263
     264    iBuilder->CreateBr(startOuterCond);
     265    iBuilder->SetInsertPoint(startOuterCond);
     266
     267    PHINode * startIV = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 2);
     268    startIV->addIncoming(iBuilder->getInt64(0), entry);
     269    Value * startOuterTest = iBuilder->CreateICmpNE(startIV, iBuilder->getInt64(fieldCount));
     270    iBuilder->CreateCondBr(startOuterTest, startOuterBody, endOuterCond);
     271
     272    iBuilder->SetInsertPoint(startOuterBody);
     273    Value * startField = iBuilder->CreateExtractElement(startStream, startIV);
     274    startIV->addIncoming(iBuilder->CreateAdd(startIV, iBuilder->getInt64(1)), startInnerCond);
     275    iBuilder->CreateBr(startInnerCond);
     276
     277    iBuilder->SetInsertPoint(startInnerCond);
     278    PHINode * startIndexPhi = iBuilder->CreatePHI(startIndex->getType(), 2);
     279    startIndexPhi->addIncoming(startIndex, startOuterBody);
     280    PHINode * startFieldPhi = iBuilder->CreatePHI(intScanWordTy, 2);
     281    startFieldPhi->addIncoming(startField, startOuterBody);
     282    Value * test = iBuilder->CreateICmpNE(startFieldPhi, ConstantInt::getNullValue(intScanWordTy));
     283    iBuilder->CreateCondBr(test, startInnerBody, startOuterCond);
     284
     285    iBuilder->SetInsertPoint(startInnerBody);
     286    Value * startPos = generateCountForwardZeroes(iBuilder, startFieldPhi);
     287    startFieldPhi->addIncoming(generateResetLowestBit(iBuilder, startFieldPhi), startInnerBody);
     288    startPos = iBuilder->CreateTruncOrBitCast(iBuilder->CreateOr(startPos, blockPos), iBuilder->getInt32Ty());
     289    iBuilder->CreateStore(startPos, iBuilder->CreateGEP(startArray, {iBuilder->getInt32(0), startIndexPhi}));
     290    startIndexPhi->addIncoming(iBuilder->CreateAdd(startIndexPhi, ConstantInt::get(startIndexPhi->getType(), 1)), startInnerBody);
     291    iBuilder->CreateBr(startInnerCond);
     292    // END POINT OUTER COND
     293    iBuilder->SetInsertPoint(endOuterCond);
     294    PHINode * endIV = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 2);
     295    endIV->addIncoming(iBuilder->getInt64(0), startOuterCond);
     296    Value * endOuterTest = iBuilder->CreateICmpNE(endIV, iBuilder->getInt64(fieldCount));
     297    iBuilder->CreateCondBr(endOuterTest, endOuterBody, exit);
     298    // END POINT OUTER BODY
     299    iBuilder->SetInsertPoint(endOuterBody);
     300    Value * endField = iBuilder->CreateExtractElement(endStream, endIV);
     301    endIV->addIncoming(iBuilder->CreateAdd(endIV, iBuilder->getInt64(1)), endInnerCond);
     302    iBuilder->CreateBr(endInnerCond);
     303    // END POINT INNER COND
     304    iBuilder->SetInsertPoint(endInnerCond);
     305    PHINode * endIndexPhi = iBuilder->CreatePHI(endIndex->getType(), 3);
     306    endIndexPhi->addIncoming(endIndex, endOuterBody);
     307    PHINode * endFieldPhi = iBuilder->CreatePHI(intScanWordTy, 3);
     308    endFieldPhi->addIncoming(endField, endOuterBody);
     309    Value * endInnerTest = iBuilder->CreateICmpNE(endFieldPhi, ConstantInt::getNullValue(intScanWordTy));
     310    iBuilder->CreateCondBr(endInnerTest, endInnerBody, endOuterCond);
     311    // END POINT INNER BODY
     312    iBuilder->SetInsertPoint(endInnerBody);
     313    Value * endPos = generateCountForwardZeroes(iBuilder, endFieldPhi);
     314    Value * updatedEndFieldPhi = generateResetLowestBit(iBuilder, endFieldPhi);
     315    endFieldPhi->addIncoming(updatedEndFieldPhi, endInnerBody);
     316    endPos = iBuilder->CreateTruncOrBitCast(iBuilder->CreateOr(endPos, blockPos), iBuilder->getInt32Ty());
     317    iBuilder->CreateStore(endPos, iBuilder->CreateGEP(endArray, {iBuilder->getInt32(0), endIndexPhi}));
     318    Value * updatedEndIndexPhi = iBuilder->CreateAdd(endIndexPhi, ConstantInt::get(endIndexPhi->getType(), 1));
     319    endIndexPhi->addIncoming(updatedEndIndexPhi, endInnerBody);
     320    Value * filledEndPosBufferTest = iBuilder->CreateICmpEQ(updatedEndIndexPhi, ConstantInt::get(updatedEndIndexPhi->getType(), vectorWidth));
     321    iBuilder->CreateCondBr(filledEndPosBufferTest, gatherInit, endInnerCond);
     322    // GATHER INIT
     323    iBuilder->SetInsertPoint(gatherInit);
     324    Value * rawTokenBuffer = iBuilder->CreateAlloca(ArrayType::get(gatherVectorType, (maxKeyLength / 4) + (maxKeyLength % 4) != 0 ? 1 : 0));
     325    rawTokenBuffer = iBuilder->CreatePointerCast(rawTokenBuffer, PointerType::get(gatherVectorType, 0));
     326    Value * const startPositions = iBuilder->CreateAlignedLoad(iBuilder->CreatePointerCast(startArray, PointerType::get(gatherVectorType, 0)), 4);
     327    iBuilder->CreateBr(gatherFullCond);
     328    // GATHER FULL COND
     329    iBuilder->SetInsertPoint(gatherFullCond);
     330
     331    endIndexPhi->addIncoming(iBuilder->getInt32(0), gatherFullCond);
     332    endFieldPhi->addIncoming(updatedEndFieldPhi, gatherFullCond);
     333
     334    PHINode * fullGatherIV = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 2);
     335    fullGatherIV->addIncoming(iBuilder->getInt64(0), gatherInit);
     336    PHINode * startPositionsPhi = iBuilder->CreatePHI(startPositions->getType(), 2);
     337    startPositionsPhi->addIncoming(startPositions, gatherInit);
     338
     339    Value * fullGatherTest = iBuilder->CreateICmpNE(fullGatherIV, iBuilder->getInt64(minKeyLength / vectorWidth));
     340    iBuilder->CreateCondBr(fullGatherTest, gatherFullBody, endInnerCond);
     341    // GATHER FULL BODY
     342    iBuilder->SetInsertPoint(gatherFullBody);
     343    Value * gathered = generateGather(base, startPositionsPhi);
     344    startPositionsPhi->addIncoming(iBuilder->CreateAdd(startPositionsPhi, iBuilder->CreateVectorSplat(vectorWidth, iBuilder->getInt32(4))), gatherFullBody);
     345    iBuilder->CreateAlignedStore(gathered, iBuilder->CreateGEP(rawTokenBuffer, fullGatherIV), 4);
     346    fullGatherIV->addIncoming(iBuilder->CreateAdd(fullGatherIV, iBuilder->getInt64(1)), gatherFullBody);
     347    iBuilder->CreateBr(gatherFullCond);
     348
     349    iBuilder->SetInsertPoint(exit);
     350    // need to save the start/end index still
     351    kBuilder->finalize();
    117352}
    118353
     
    133368    PabloFunction * const sorting = generateSortingFunction(leading, endpoints);
    134369
    135     mS2PKernel = new KernelBuilder("s2p", mMod, iBuilder);
    136     mLeadingKernel = new KernelBuilder("leading", mMod, iBuilder);
    137     mSortingKernel = new KernelBuilder("sorting", mMod, iBuilder);
    138 
    139     mLeadingKernel->setLongestLookaheadAmount(mLongestLookahead);
    140     mSortingKernel->setLongestLookaheadAmount(mLongestLookahead);
     370    const auto bufferSize = ((mLongestLookahead + iBuilder->getBitBlockWidth() - 1) / iBuilder->getBitBlockWidth()) + 1;
     371
     372    mS2PKernel = new KernelBuilder("s2p", mMod, iBuilder, 1);
     373    mLeadingKernel = new KernelBuilder("leading", mMod, iBuilder, bufferSize);
     374    mSortingKernel = new KernelBuilder("sorting", mMod, iBuilder, bufferSize);
     375    mScannerKernel = new KernelBuilder("scanner", mMod, iBuilder, 1);
    141376
    142377    generateS2PKernel(mMod, iBuilder, mS2PKernel);
     
    151386
    152387    releaseSlabAllocatorMemory();
     388
     389    generateScannerKernel(mScannerKernel, 1, 1, 64);
     390
    153391}
    154392
     
    163401
    164402    Value * const inputStream = args++;
    165     inputStream->setName("input");
     403    inputStream->setName("inputStream");
    166404
    167405    Value * const bufferSize = args++;
    168     bufferSize->setName("buffersize");
     406    bufferSize->setName("bufferSize");
    169407
    170408    iBuilder->SetInsertPoint(BasicBlock::Create(mMod->getContext(), "entry", main,0));
     
    173411
    174412    BasicBlock * leadingTestBlock = BasicBlock::Create(mMod->getContext(), "leadingCond", main, 0);
     413    BasicBlock * safetyCheckBlock = BasicBlock::Create(mMod->getContext(), "safetyCheck", main, 0);
    175414    BasicBlock * leadingBodyBlock = BasicBlock::Create(mMod->getContext(), "leadingBody", main, 0);
    176415
     
    211450    remainingBytes->addIncoming(bufferSize, entryBlock);
    212451    Value * leadingBlocksCond = iBuilder->CreateICmpULT(blockNo, iBuilder->getInt64(leadingBlocks));
    213     iBuilder->CreateCondBr(leadingBlocksCond, leadingBodyBlock, regularTestBlock);
     452    iBuilder->CreateCondBr(leadingBlocksCond, safetyCheckBlock, regularTestBlock);
     453
     454    iBuilder->SetInsertPoint(safetyCheckBlock);
     455    Value * safetyCheckCond = iBuilder->CreateICmpULT(remainingBytes, blockSize);
     456    iBuilder->CreateCondBr(safetyCheckCond, regularExitBlock, leadingBodyBlock);
     457
    214458    iBuilder->SetInsertPoint(leadingBodyBlock);
    215459    s2pInstance->CreateDoBlockCall();
     
    221465    // Now all the data for which we can produce and consume a full leading block...
    222466    iBuilder->SetInsertPoint(regularTestBlock);
    223     PHINode * blockNo2 = iBuilder->CreatePHI(intType, 2);
    224     blockNo2->addIncoming(blockNo, leadingTestBlock);
    225467    PHINode * remainingBytes2 = iBuilder->CreatePHI(intType, 2);
    226468    remainingBytes2->addIncoming(remainingBytes, leadingTestBlock);
    227     Value * remainingBytesCond = iBuilder->CreateICmpUGE(remainingBytes2, requiredBytes);
    228     iBuilder->CreateCondBr(remainingBytesCond, regularBodyBlock, regularExitBlock);
     469    Value * remainingBytesCond = iBuilder->CreateICmpULT(remainingBytes2, requiredBytes);
     470    iBuilder->CreateCondBr(remainingBytesCond, regularExitBlock, regularBodyBlock);
    229471    iBuilder->SetInsertPoint(regularBodyBlock);
    230472    s2pInstance->CreateDoBlockCall();
    231473    leadingInstance->CreateDoBlockCall();
    232474    sortingInstance->CreateDoBlockCall();
    233     blockNo2->addIncoming(iBuilder->CreateAdd(blockNo2, iBuilder->getInt64(1)), regularBodyBlock);
    234475    remainingBytes2->addIncoming(iBuilder->CreateSub(remainingBytes2, blockSize), regularBodyBlock);
    235476    iBuilder->CreateBr(regularTestBlock);
    236477
    237 
    238478    // Check if we have a partial blocks worth of leading data remaining
    239479    iBuilder->SetInsertPoint(regularExitBlock);
    240     Value * partialBlockCond = iBuilder->CreateICmpUGT(remainingBytes2, ConstantInt::getNullValue(intType));
    241     iBuilder->CreateCondBr(partialBlockCond, partialBlock, finalTestBlock);
     480    PHINode * remainingBytes3 = iBuilder->CreatePHI(intType, 2);
     481    remainingBytes3->addIncoming(remainingBytes, safetyCheckBlock);
     482    remainingBytes3->addIncoming(remainingBytes2, regularTestBlock);
     483    Value * partialBlockCond = iBuilder->CreateICmpNE(remainingBytes3, ConstantInt::getNullValue(intType));
     484    iBuilder->CreateCondBr(partialBlockCond, finalTestBlock, partialBlock);
    242485
    243486    // If we do, process it and mask out the data
    244487    iBuilder->SetInsertPoint(partialBlock);
    245488    s2pInstance->CreateDoBlockCall();
    246     Value * partialLeadingData[2];
    247     for (unsigned i = 0; i < 2; ++i) {
    248         partialLeadingData[i] = leadingInstance->getOutputStream(i);
    249     }
    250489    leadingInstance->CreateDoBlockCall();
    251     Type * fullBitBlockType = iBuilder->getIntNTy(mBlockSize);
    252     Value * remaining = iBuilder->CreateZExt(iBuilder->CreateSub(blockSize, remainingBytes2), fullBitBlockType);
    253     Value * eofMask = iBuilder->CreateLShr(ConstantInt::getAllOnesValue(fullBitBlockType), remaining);
    254     eofMask = iBuilder->CreateBitCast(eofMask, mBitBlockType);
    255     for (unsigned i = 0; i < 2; ++i) {
    256         Value * value = iBuilder->CreateAnd(iBuilder->CreateBlockAlignedLoad(partialLeadingData[i]), eofMask);
    257         iBuilder->CreateBlockAlignedStore(value, partialLeadingData[i]);
    258     }
    259     for (unsigned i = 0; i < 2; ++i) {
    260         iBuilder->CreateBlockAlignedStore(ConstantInt::getNullValue(mBitBlockType), leadingInstance->getOutputStream(i));
    261     }
     490    leadingInstance->clearOutputStreamSet();
    262491    sortingInstance->CreateDoBlockCall();
    263492    iBuilder->CreateBr(finalTestBlock);
     
    272501
    273502    iBuilder->SetInsertPoint(finalBodyBlock);
    274     for (unsigned i = 0; i < 2; ++i) {
    275         iBuilder->CreateBlockAlignedStore(ConstantInt::getNullValue(mBitBlockType), leadingInstance->getOutputStream(i));
    276     }
    277     Value * blockNoPtr = leadingInstance->getBlockNo();
    278     Value * blockNoValue = iBuilder->CreateLoad(blockNoPtr);
    279     blockNoValue = iBuilder->CreateAdd(blockNoValue, ConstantInt::get(blockNoValue->getType(), 1));
    280     iBuilder->CreateStore(blockNoValue, blockNoPtr);
    281 
     503    leadingInstance->clearOutputStreamSet();
    282504    sortingInstance->CreateDoBlockCall();
    283 
    284505    remainingFullBlocks->addIncoming(iBuilder->CreateSub(remainingFullBlocks, iBuilder->getInt64(1)), finalBodyBlock);
    285506
    286507    iBuilder->CreateBr(finalTestBlock);
    287 
    288508    iBuilder->SetInsertPoint(exitBlock);
    289509    iBuilder->CreateRetVoid();
    290 
    291     main->dump();
    292510
    293511    return main;
     
    298516    delete mLeadingKernel;
    299517    delete mSortingKernel;
    300 }
    301 
    302 }
     518    delete mScannerKernel;
     519}
     520
     521
     522}
  • icGREP/icgrep-devel/icgrep/kernels/symboltablepipeline.h

    r4974 r4991  
    3232    pablo::PabloFunction * generateSortingFunction(const pablo::PabloFunction * const leading, const std::vector<unsigned> & endpoints);
    3333
     34    void generateScannerKernel(KernelBuilder * kBuilder, const unsigned minKeyLength, const unsigned maxKeyLength, const unsigned scanWordBitWidth = 64);
     35    Function * generateScanWordRoutine(KernelBuilder * const kBuilder, const unsigned scanWordBitWidth);
     36
     37    Value * generateGather(Value * const base, Value * const vindex);
     38    Value * generateMaskedGather(Value * const base, Value * const vindex, Value * const mask);
     39
    3440    void generateLLVMParser();
    3541
     
    4046    KernelBuilder *                     mLeadingKernel;
    4147    KernelBuilder *                     mSortingKernel;
     48    KernelBuilder *                     mScannerKernel;
     49
    4250    unsigned                            mLongestLookahead;
    4351    llvm::Type *                        mBitBlockType;
Note: See TracChangeset for help on using the changeset viewer.