Changeset 6261


Ignore:
Timestamp:
Dec 31, 2018, 5:25:19 PM (3 months ago)
Author:
nmedfort
Message:

Work on OptimizationBranch?; revisited pipeline termination

Location:
icGREP/icgrep-devel/icgrep
Files:
72 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IR_Gen/CBuilder.cpp

    r6249 r6261  
    1717#include <toolchain/toolchain.h>
    1818#include <toolchain/driver.h>
    19 //#include <thread>
    2019#include <stdlib.h>
    2120#include <sys/mman.h>
     
    691690LoadInst * CBuilder::CreateAtomicLoadAcquire(Value * ptr) {
    692691    const auto alignment = ptr->getType()->getPointerElementType()->getPrimitiveSizeInBits() / 8;
    693     LoadInst * inst = CreateAlignedLoad(ptr, alignment, true);
     692    LoadInst * inst = CreateAlignedLoad(ptr, alignment, false);
    694693    inst->setOrdering(AtomicOrdering::Acquire);
    695694    return inst;
     
    698697StoreInst * CBuilder::CreateAtomicStoreRelease(Value * val, Value * ptr) {
    699698    const auto alignment = ptr->getType()->getPointerElementType()->getPrimitiveSizeInBits() / 8;
    700     StoreInst * inst = CreateAlignedStore(val, ptr, alignment, true);
     699    StoreInst * inst = CreateAlignedStore(val, ptr, alignment, false);
    701700    inst->setOrdering(AtomicOrdering::Release);
    702701    return inst;
     
    793792    Function * pthreadCreateFunc = m->getFunction("pthread_create");
    794793    if (pthreadCreateFunc == nullptr) {
    795         Type * pthreadTy = getSizeTy();
     794        Type * const pthreadTy = TypeBuilder<pthread_t, false>::get(getContext());
    796795        FunctionType * funVoidPtrVoidTy = FunctionType::get(getVoidTy(), {voidPtrTy}, false);
    797796        FunctionType * fty = FunctionType::get(getInt32Ty(), {pthreadTy->getPointerTo(), voidPtrTy, funVoidPtrVoidTy->getPointerTo(), voidPtrTy}, false);
     
    799798        pthreadCreateFunc->setCallingConv(CallingConv::C);
    800799    }
    801     assert (thread->getType()->isPointerTy());
    802800    return CreateCall(pthreadCreateFunc, {thread, attr, start_routine, CreatePointerCast(arg, voidPtrTy)});
    803801}
     
    832830    Function * pthreadJoinFunc = m->getFunction("pthread_join");
    833831    if (pthreadJoinFunc == nullptr) {
    834         FunctionType * fty = FunctionType::get(getInt32Ty(), {getSizeTy(), getVoidPtrTy()->getPointerTo()}, false);
     832        Type * const pthreadTy = TypeBuilder<pthread_t, false>::get(getContext());
     833        FunctionType * fty = FunctionType::get(getInt32Ty(), {pthreadTy, getVoidPtrTy()->getPointerTo()}, false);
    835834        pthreadJoinFunc = Function::Create(fty, Function::ExternalLinkage, "pthread_join", m);
    836835        pthreadJoinFunc->setCallingConv(CallingConv::C);
  • icGREP/icgrep-devel/icgrep/editd/editd.cpp

    r6249 r6261  
    163163#warning make a "CBuffer" class to abstract away the complexity of making these function typedefs.
    164164
    165 typedef void (*preprocessFunctionType)(char * output_data, size_t output_produced, size_t output_size, const uint32_t fd);
     165typedef void (*preprocessFunctionType)(char * output_data, size_t & output_produced, size_t output_size, const uint32_t fd);
    166166
    167167static char * chStream;
     
    242242    const auto n = round_up_to(size, 8 * ALIGNMENT);
    243243    chStream = alloc.allocate((4 * n) / 8);
    244     preprocess(chStream, 0, n, fd);
     244    size_t length;
     245    preprocess(chStream, length, n, fd);
    245246    close(fd);
    246247    return chStream;
  • icGREP/icgrep-devel/icgrep/editd/editd_cpu_kernel.cpp

    r6184 r6261  
    5959        Value * pattStream = idb->loadInputStreamBlock("CCStream", idb->CreateZExt(pattIdx, int32ty));
    6060        pattPos = idb->CreateAdd(pattPos, ConstantInt::get(int32ty, 1));
    61        
     61
    6262        e[0][0] = pattStream;
    6363        for(unsigned i = 1; i < mPatternLen; i++){
     
    9999                               Scalar * const pattStream,
    100100                               StreamSet * const CCStream, StreamSet * const ResultStream)
    101 : BlockOrientedKernel("editd_cpu" + std::to_string(patternLen) + "x" + std::to_string(groupSize),
     101: BlockOrientedKernel(b, "editd_cpu" + std::to_string(patternLen) + "x" + std::to_string(groupSize),
    102102// input stream
    103103{Binding{"CCStream", CCStream}},
  • icGREP/icgrep-devel/icgrep/editd/editd_gpu_kernel.cpp

    r5985 r6261  
    3131
    3232    IntegerType * const int32ty = idb->getInt32Ty();
    33     IntegerType * const int8ty = idb->getInt8Ty(); 
     33    IntegerType * const int8ty = idb->getInt8Ty();
    3434    Value * groupLen = idb->getInt32((mPatternLen + 1) * mGroupSize);
    3535    Value * pattPos = idb->getInt32(0);
    3636    Value * pattBuf = idb->getScalarField("pattStream");
    3737    Value * strideCarryArr = idb->getScalarField("strideCarry");
    38    
     38
    3939    unsigned carryIdx = 0;
    4040
     
    9898
    9999editdGPUKernel::editdGPUKernel(const std::unique_ptr<kernel::KernelBuilder> & b, unsigned dist, unsigned pattLen, unsigned groupSize) :
    100 BlockOrientedKernel("editd_gpu",
     100BlockOrientedKernel(b, "editd_gpu",
    101101              {Binding{b->getStreamSetTy(4), "CCStream"}},
    102102              {Binding{b->getStreamSetTy(dist + 1), "ResultStream"}},
  • icGREP/icgrep-devel/icgrep/editd/editdscan_kernel.cpp

    r6184 r6261  
    2323    Value * blockNo = b->getScalarField("BlockNo");
    2424    Value * scanwordPos = b->CreateMul(blockNo, ConstantInt::get(blockNo->getType(), b->getBitBlockWidth()));
    25    
     25
    2626    std::vector<Value * > matchWordVectors;
    2727    for(unsigned d = 0; d < mNumElements; d++) {
     
    2929        matchWordVectors.push_back(b->CreateBitCast(matches, scanwordVectorType));
    3030    }
    31    
     31
    3232    for(unsigned i = 0; i < fieldCount; ++i) {
    3333        for(unsigned d = 0; d < mNumElements; d++) {
     
    8787
    8888editdScanKernel::editdScanKernel(const std::unique_ptr<kernel::KernelBuilder> & b, StreamSet * matchResults) :
    89 BlockOrientedKernel("editdScanMatch" + std::to_string(matchResults->getNumElements()),
     89BlockOrientedKernel(b, "editdScanMatch" + std::to_string(matchResults->getNumElements()),
    9090              {Binding{"matchResults", matchResults}},
    9191              {}, {}, {}, {Binding{b->getSizeTy(), "BlockNo"}}),
  • icGREP/icgrep-devel/icgrep/idisa_test.cpp

    r6253 r6261  
    5656
    5757ShiftLimitKernel::ShiftLimitKernel(const std::unique_ptr<KernelBuilder> & b, unsigned fw, unsigned limit, StreamSet * input, StreamSet * output)
    58 : BlockOrientedKernel("shiftLimit" + std::to_string(fw) + "_" + std::to_string(limit),
     58: BlockOrientedKernel(b, "shiftLimit" + std::to_string(fw) + "_" + std::to_string(limit),
    5959                              {Binding{"shiftOperand", input}},
    6060                              {Binding{"limitedShift", output}},
     
    7373class IdisaBinaryOpTestKernel : public MultiBlockKernel {
    7474public:
    75     IdisaBinaryOpTestKernel(const std::unique_ptr<KernelBuilder> &, std::string idisa_op, unsigned fw, unsigned imm,
     75    IdisaBinaryOpTestKernel(const std::unique_ptr<KernelBuilder> &b, std::string idisa_op, unsigned fw, unsigned imm,
    7676                            StreamSet * Operand1, StreamSet * Operand2, StreamSet * result);
    7777    bool isCachable() const override { return true; }
     
    8585};
    8686
    87 IdisaBinaryOpTestKernel::IdisaBinaryOpTestKernel(const std::unique_ptr<KernelBuilder> & /* b */, std::string idisa_op, unsigned fw, unsigned imm,
     87IdisaBinaryOpTestKernel::IdisaBinaryOpTestKernel(const std::unique_ptr<KernelBuilder> & b, std::string idisa_op, unsigned fw, unsigned imm,
    8888                                                 StreamSet * Operand1, StreamSet * Operand2, StreamSet * result)
    89 : MultiBlockKernel(idisa_op + std::to_string(fw) + "_test",
     89: MultiBlockKernel(b, idisa_op + std::to_string(fw) + "_test",
    9090     {Binding{"operand1", Operand1}, Binding{"operand2", Operand2}},
    9191     {Binding{"result", result}},
     
    181181};
    182182
    183 IdisaBinaryOpCheckKernel::IdisaBinaryOpCheckKernel(const std::unique_ptr<KernelBuilder> & /* b */, std::string idisa_op, unsigned fw, unsigned imm,
     183IdisaBinaryOpCheckKernel::IdisaBinaryOpCheckKernel(const std::unique_ptr<KernelBuilder> & b, std::string idisa_op, unsigned fw, unsigned imm,
    184184                                                   StreamSet * Operand1, StreamSet * Operand2, StreamSet * result,
    185185                                                   StreamSet * expected, Scalar * failures)
    186 : BlockOrientedKernel(idisa_op + std::to_string(fw) + "_check" + std::to_string(QuietMode),
     186: BlockOrientedKernel(b, idisa_op + std::to_string(fw) + "_check" + std::to_string(QuietMode),
    187187                           {Binding{"operand1", Operand1},
    188188                            Binding{"operand2", Operand2},
  • icGREP/icgrep-devel/icgrep/kernels/alignedprint.cpp

    r6184 r6261  
    4747    00110001 is the Unicode codepoint for '1' and 00101110 is the codepoint for '.'.
    4848    We want to output a byte stream that is aligned with the input bitstream such that it contains 00110001 in each 1 position and 00101110 in each 0 position.
    49    
     49
    5050    For example, consider input bitstream 101. Our desired output is:
    5151    00110001 00101110 00110001
     
    6060    0   1   0 -> opposite
    6161    0   1   0 -> opposite
    62     1   0   1 -> same as 4th bit position. 
    63    
     62    1   0   1 -> same as 4th bit position.
     63
    6464    Armed with the above we can do the bit->byte conversion all at once
    6565    rather than byte at a time! That's what we do below.
     
    7575    bits[6] = negBitStrmVal;
    7676    bits[7] = bitStrmVal;
    77    
     77
    7878    // Reassemble the paralell bit streams into a byte stream
    7979    Value * printableBytes[8];
    8080    p2s(iBuilder, bits, printableBytes);
    81    
     81
    8282    for (unsigned j = 0; j < 8; ++j) {
    8383        iBuilder->storeOutputStreamPack("byteStream", iBuilder->getInt32(0), iBuilder->getInt32(j), iBuilder->bitCast(printableBytes[j]));
     
    8888    if (mStreamIndex >= mSizeInputStreamSet)
    8989        llvm::report_fatal_error("Stream index out of bounds.\n");
    90    
     90
    9191    Value * bitStrmVal = iBuilder->loadInputStreamBlock("bitStreams", iBuilder->getInt32(mStreamIndex));
    9292
     
    104104        }
    105105    }
    106    
     106
    107107}
    108108
     
    269269
    270270PrintableBits::PrintableBits(const std::unique_ptr<kernel::KernelBuilder> & builder)
    271 : BlockOrientedKernel("PrintableBits", {Binding{builder->getStreamSetTy(1), "bitStream"}}, {Binding{builder->getStreamSetTy(1, 8), "byteStream"}}, {}, {}, {}) {
     271: BlockOrientedKernel(b, "PrintableBits", {Binding{builder->getStreamSetTy(1), "bitStream"}}, {Binding{builder->getStreamSetTy(1, 8), "byteStream"}}, {}, {}, {}) {
    272272
    273273}
    274274
    275275SelectStream::SelectStream(const std::unique_ptr<kernel::KernelBuilder> & builder, unsigned sizeInputStreamSet, unsigned streamIndex)
    276 : BlockOrientedKernel("SelectStream", {Binding{builder->getStreamSetTy(sizeInputStreamSet), "bitStreams"}}, {Binding{builder->getStreamSetTy(1, 1), "bitStream"}}, {}, {}, {}), mSizeInputStreamSet(sizeInputStreamSet), mStreamIndex(streamIndex) {
     276: BlockOrientedKernel(b, "SelectStream", {Binding{builder->getStreamSetTy(sizeInputStreamSet), "bitStreams"}}, {Binding{builder->getStreamSetTy(1, 1), "bitStream"}}, {}, {}, {}), mSizeInputStreamSet(sizeInputStreamSet), mStreamIndex(streamIndex) {
    277277
    278278}
    279279
    280280ExpandOrSelectStreams::ExpandOrSelectStreams(const std::unique_ptr<kernel::KernelBuilder> & builder, unsigned sizeInputStreamSet, unsigned sizeOutputStreamSet)
    281 : BlockOrientedKernel("ExpandOrSelectStreams", {Binding{builder->getStreamSetTy(sizeInputStreamSet), "bitStreams"}}, {Binding{builder->getStreamSetTy(sizeOutputStreamSet), "outputbitStreams"}}, {}, {}, {}), mSizeInputStreamSet(sizeInputStreamSet), mSizeOutputStreamSet(sizeOutputStreamSet) {
     281: BlockOrientedKernel(b, "ExpandOrSelectStreams", {Binding{builder->getStreamSetTy(sizeInputStreamSet), "bitStreams"}}, {Binding{builder->getStreamSetTy(sizeOutputStreamSet), "outputbitStreams"}}, {}, {}, {}), mSizeInputStreamSet(sizeInputStreamSet), mSizeOutputStreamSet(sizeOutputStreamSet) {
    282282
    283283}
    284284
    285285PrintStreamSet::PrintStreamSet(const std::unique_ptr<kernel::KernelBuilder> & builder, std::vector<std::string> && names, const unsigned minWidth)
    286 : BlockOrientedKernel("PrintableStreamSet", {}, {}, {}, {}, {})
     286: BlockOrientedKernel(b, "PrintableStreamSet", {}, {}, {}, {}, {})
    287287, mNames(names)
    288288, mNameWidth(0) {
  • icGREP/icgrep-devel/icgrep/kernels/bitstream_gather_pdep_kernel.cpp

    r6184 r6261  
    1111
    1212    BitStreamGatherPDEPKernel::BitStreamGatherPDEPKernel(const std::unique_ptr<kernel::KernelBuilder> & b, const unsigned numberOfStream, std::string name)
    13             : MultiBlockKernel(std::move(name),
     13            : MultiBlockKernel(b, std::move(name),
    1414// input stream sets
    1515                               {Binding{b->getStreamSetTy(), "marker", FixedRate(), Principal()},
  • icGREP/icgrep-devel/icgrep/kernels/bitstream_pdep_kernel.cpp

    r6066 r6261  
    1515
    1616    BitStreamPDEPKernel::BitStreamPDEPKernel(const std::unique_ptr<kernel::KernelBuilder> & b, const unsigned numberOfStream, std::string name)
    17             : MultiBlockKernel(std::move(name),
     17            : MultiBlockKernel(b, std::move(name),
    1818// input stream sets
    1919                               {Binding{b->getStreamSetTy(), "marker", FixedRate(), Principal()},
  • icGREP/icgrep-devel/icgrep/kernels/block_kernel.cpp

    r6253 r6261  
    406406
    407407// CONSTRUCTOR
    408 BlockOrientedKernel::BlockOrientedKernel(std::string && kernelName,
    409                                          Bindings && stream_inputs,
    410                                          Bindings && stream_outputs,
    411                                          Bindings && scalar_parameters,
    412                                          Bindings && scalar_outputs,
    413                                          Bindings && internal_scalars)
    414 : MultiBlockKernel(TypeId::BlockOriented,
    415                    std::move(kernelName),
    416                    std::move(stream_inputs),
    417                    std::move(stream_outputs),
    418                    std::move(scalar_parameters),
    419                    std::move(scalar_outputs),
    420                    std::move(internal_scalars))
     408BlockOrientedKernel::BlockOrientedKernel(
     409    const std::unique_ptr<KernelBuilder> & b,
     410    std::string && kernelName,
     411    Bindings && stream_inputs,
     412    Bindings && stream_outputs,
     413    Bindings && scalar_parameters,
     414    Bindings && scalar_outputs,
     415    Bindings && internal_scalars)
     416: MultiBlockKernel(b,
     417    TypeId::BlockOriented,
     418    std::move(kernelName),
     419    std::move(stream_inputs),
     420    std::move(stream_outputs),
     421    std::move(scalar_parameters),
     422    std::move(scalar_outputs),
     423    std::move(internal_scalars))
    421424, mDoBlockMethod(nullptr)
    422425, mStrideLoopBody(nullptr)
  • icGREP/icgrep-devel/icgrep/kernels/cc_scan_kernel.cpp

    r5440 r6261  
    2121    const unsigned fieldCount = iBuilder->getBitBlockWidth() / mScanwordBitWidth;
    2222    Type * T = iBuilder->getIntNTy(mScanwordBitWidth);
    23     VectorType * scanwordVectorType =  VectorType::get(T, fieldCount);   
     23    VectorType * scanwordVectorType =  VectorType::get(T, fieldCount);
    2424    Value * blockNo = iBuilder->getScalarField("BlockNo");
    2525    Value * scanwordPos = iBuilder->CreateMul(blockNo, ConstantInt::get(blockNo->getType(), iBuilder->getBitBlockWidth()));
    26    
     26
    2727    std::vector<Value * > matchWordVectors;
    2828    for(unsigned d = 0; d < mStreamNum; d++) {
     
    3030        matchWordVectors.push_back(iBuilder->CreateBitCast(matches, scanwordVectorType));
    3131    }
    32    
     32
    3333    for(unsigned i = 0; i < fieldCount; ++i) {
    3434        for(unsigned d = 0; d < mStreamNum; d++) {
     
    3737        }
    3838        scanwordPos = iBuilder->CreateAdd(scanwordPos, ConstantInt::get(T, mScanwordBitWidth));
    39     }   
     39    }
    4040    iBuilder->setScalarField("BlockNo", iBuilder->CreateAdd(blockNo, iBuilder->getSize(1)));
    4141}
     
    9393}
    9494
    95 CCScanKernel::CCScanKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, unsigned streamNum) :
    96 BlockOrientedKernel("CCScan",
    97               {Binding{iBuilder->getStreamSetTy(streamNum), "matchResults"}},
    98               {}, {}, {}, {Binding{iBuilder->getSizeTy(), "BlockNo"}}),
     95CCScanKernel::CCScanKernel(const std::unique_ptr<kernel::KernelBuilder> & b, unsigned streamNum)
     96: BlockOrientedKernel(b, "CCScan",
     97              {Binding{b->getStreamSetTy(streamNum), "matchResults"}},
     98              {}, {}, {}, {Binding{b->getSizeTy(), "BlockNo"}}),
    9999mStreamNum(streamNum),
    100 mScanwordBitWidth(iBuilder->getSizeTy()->getBitWidth()) {
     100mScanwordBitWidth(b->getSizeTy()->getBitWidth()) {
    101101
    102102}
  • icGREP/icgrep-devel/icgrep/kernels/cc_scan_kernel.h

    r5440 r6261  
    1212
    1313namespace kernel {
    14    
     14
    1515class CCScanKernel : public BlockOrientedKernel {
    1616public:
    17     CCScanKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, unsigned streamNum);
    18        
     17    CCScanKernel(const std::unique_ptr<kernel::KernelBuilder> & b, unsigned streamNum);
     18
    1919private:
    2020    void generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) override;
    2121    llvm::Function * generateScanWordRoutine(const std::unique_ptr<KernelBuilder> & iBuilder) const;
    22        
     22
    2323    unsigned mStreamNum;
    2424    unsigned mScanwordBitWidth;
  • icGREP/icgrep-devel/icgrep/kernels/deletion.cpp

    r6258 r6261  
    7878}
    7979
    80 DeletionKernel::DeletionKernel(const std::unique_ptr<kernel::KernelBuilder> & kb, const unsigned fieldWidth, const unsigned streamCount)
    81 : BlockOrientedKernel("del" + std::to_string(fieldWidth) + "_" + std::to_string(streamCount),
    82                       {Binding{kb->getStreamSetTy(streamCount), "inputStreamSet"},
    83                           Binding{kb->getStreamSetTy(), "delMaskSet"}},
    84                       {Binding{kb->getStreamSetTy(streamCount), "outputStreamSet"},
    85                           Binding{kb->getStreamSetTy(), "unitCounts", FixedRate(), RoundUpTo(kb->getBitBlockWidth())}},
     80DeletionKernel::DeletionKernel(const std::unique_ptr<kernel::KernelBuilder> & b, const unsigned fieldWidth, const unsigned streamCount)
     81: BlockOrientedKernel(b, "del" + std::to_string(fieldWidth) + "_" + std::to_string(streamCount),
     82                      {Binding{b->getStreamSetTy(streamCount), "inputStreamSet"},
     83                          Binding{b->getStreamSetTy(), "delMaskSet"}},
     84                      {Binding{b->getStreamSetTy(streamCount), "outputStreamSet"},
     85                          Binding{b->getStreamSetTy(), "unitCounts", FixedRate(), RoundUpTo(b->getBitBlockWidth())}},
    8686                      {}, {}, {})
    8787, mDeletionFieldWidth(fieldWidth)
     
    116116                                         , StreamSet * inputStreamSet, StreamSet * extractionMask
    117117                                         , StreamSet * outputStreamSet)
    118 : MultiBlockKernel("fieldCompress" + std::to_string(fw) + "_" + std::to_string(inputStreamSet->getNumElements()),
     118: MultiBlockKernel(b, "fieldCompress" + std::to_string(fw) + "_" + std::to_string(inputStreamSet->getNumElements()),
    119119// inputs
    120120{Binding{"inputStreamSet", inputStreamSet},
     
    170170}
    171171
    172 PEXTFieldCompressKernel::PEXTFieldCompressKernel(const std::unique_ptr<kernel::KernelBuilder> & kb, const unsigned fieldWidth, const unsigned streamCount)
    173 : MultiBlockKernel("PEXTfieldCompress" + std::to_string(fieldWidth) + "_" + std::to_string(streamCount),
    174                    {Binding{kb->getStreamSetTy(streamCount), "inputStreamSet"},
    175                        Binding{kb->getStreamSetTy(), "extractionMask"}},
    176                    {Binding{kb->getStreamSetTy(streamCount), "outputStreamSet"}},
     172PEXTFieldCompressKernel::PEXTFieldCompressKernel(const std::unique_ptr<kernel::KernelBuilder> & b, const unsigned fieldWidth, const unsigned streamCount)
     173: MultiBlockKernel(b, "PEXTfieldCompress" + std::to_string(fieldWidth) + "_" + std::to_string(streamCount),
     174                   {Binding{b->getStreamSetTy(streamCount), "inputStreamSet"},
     175                       Binding{b->getStreamSetTy(), "extractionMask"}},
     176                   {Binding{b->getStreamSetTy(streamCount), "outputStreamSet"}},
    177177                   {}, {}, {})
    178178, mPEXTWidth(fieldWidth)
     
    186186                                           , StreamSet * compressedOutput
    187187                                           , const unsigned FieldWidth)
    188 : MultiBlockKernel("streamCompress" + std::to_string(FieldWidth) + "_" + std::to_string(source->getNumElements()),
     188: MultiBlockKernel(b, "streamCompress" + std::to_string(FieldWidth) + "_" + std::to_string(source->getNumElements()),
    189189{Binding{"sourceStreamSet", source},
    190190Binding{"extractionMask", extractionMask}},
     
    395395                                                       const unsigned PEXTWidth)
    396396
    397 : MultiBlockKernel("PEXTdel" + std::to_string(PEXTWidth) + "_" + std::to_string(inputStreamSet->getNumElements()),
     397: MultiBlockKernel(b, "PEXTdel" + std::to_string(PEXTWidth) + "_" + std::to_string(inputStreamSet->getNumElements()),
    398398{Binding{"selectors", selectors}, Binding{"inputStreamSet", inputStreamSet}},
    399399makeSwizzledDeleteByPEXTOutputBindings(outputStreamSets, PEXTWidth),
     
    666666
    667667DeleteByPEXTkernel::DeleteByPEXTkernel(const std::unique_ptr<kernel::KernelBuilder> & b, unsigned fw, unsigned streamCount, unsigned PEXT_width)
    668 : BlockOrientedKernel("PEXTdel" + std::to_string(fw) + "_" + std::to_string(streamCount) + "_" + std::to_string(PEXT_width),
     668: BlockOrientedKernel(b, "PEXTdel" + std::to_string(fw) + "_" + std::to_string(streamCount) + "_" + std::to_string(PEXT_width),
    669669              {Binding{b->getStreamSetTy(streamCount), "inputStreamSet"},
    670670                  Binding{b->getStreamSetTy(), "delMaskSet"}},
     
    695695//
    696696
    697 SwizzledBitstreamCompressByCount::SwizzledBitstreamCompressByCount(const std::unique_ptr<kernel::KernelBuilder> & kb, unsigned bitStreamCount, unsigned fieldWidth)
    698 : BlockOrientedKernel("swizzled_compress" + std::to_string(fieldWidth) + "_" + std::to_string(bitStreamCount),
    699                      {Binding{kb->getStreamSetTy(), "countsPerStride"}}, {}, {}, {}, {})
     697SwizzledBitstreamCompressByCount::SwizzledBitstreamCompressByCount(const std::unique_ptr<kernel::KernelBuilder> & b, unsigned bitStreamCount, unsigned fieldWidth)
     698: BlockOrientedKernel(b, "swizzled_compress" + std::to_string(fieldWidth) + "_" + std::to_string(bitStreamCount),
     699                     {Binding{b->getStreamSetTy(), "countsPerStride"}}, {}, {}, {}, {})
    700700, mBitStreamCount(bitStreamCount)
    701701, mFieldWidth(fieldWidth)
    702 , mSwizzleFactor(kb->getBitBlockWidth() / fieldWidth)
     702, mSwizzleFactor(b->getBitBlockWidth() / fieldWidth)
    703703, mSwizzleSetCount((mBitStreamCount + mSwizzleFactor - 1)/mSwizzleFactor) {
    704704    assert((fieldWidth > 0) && ((fieldWidth & (fieldWidth - 1)) == 0) && "fieldWidth must be a power of 2");
    705705    assert(mSwizzleFactor > 1 && "fieldWidth must be less than the block width");
    706     mInputStreamSets.push_back(Binding{kb->getStreamSetTy(mSwizzleFactor, 1), "inputSwizzle0"});
    707     mOutputStreamSets.push_back(Binding{kb->getStreamSetTy(mSwizzleFactor, 1), "outputSwizzle0", BoundedRate(0, 1)});
    708     addInternalScalar(kb->getBitBlockType(), "pendingSwizzleData0");
     706    mInputStreamSets.push_back(Binding{b->getStreamSetTy(mSwizzleFactor, 1), "inputSwizzle0"});
     707    mOutputStreamSets.push_back(Binding{b->getStreamSetTy(mSwizzleFactor, 1), "outputSwizzle0", BoundedRate(0, 1)});
     708    addInternalScalar(b->getBitBlockType(), "pendingSwizzleData0");
    709709    for (unsigned i = 1; i < mSwizzleSetCount; i++) {
    710         mInputStreamSets.push_back(Binding{kb->getStreamSetTy(mSwizzleFactor, 1), "inputSwizzle" + std::to_string(i)});
    711         mOutputStreamSets.push_back(Binding{kb->getStreamSetTy(mSwizzleFactor, 1), "outputSwizzle" + std::to_string(i), RateEqualTo("outputSwizzle0")});
    712         addInternalScalar(kb->getBitBlockType(), "pendingSwizzleData" + std::to_string(i));
    713     }
    714     addInternalScalar(kb->getSizeTy(), "pendingOffset");
     710        mInputStreamSets.push_back(Binding{b->getStreamSetTy(mSwizzleFactor, 1), "inputSwizzle" + std::to_string(i)});
     711        mOutputStreamSets.push_back(Binding{b->getStreamSetTy(mSwizzleFactor, 1), "outputSwizzle" + std::to_string(i), RateEqualTo("outputSwizzle0")});
     712        addInternalScalar(b->getBitBlockType(), "pendingSwizzleData" + std::to_string(i));
     713    }
     714    addInternalScalar(b->getSizeTy(), "pendingOffset");
    715715}
    716716
  • icGREP/icgrep-devel/icgrep/kernels/deletion.h

    r6199 r6261  
    2626class DeletionKernel final : public BlockOrientedKernel {
    2727public:
    28     DeletionKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, unsigned fw, unsigned streamCount);
     28    DeletionKernel(const std::unique_ptr<kernel::KernelBuilder> & b, unsigned fw, unsigned streamCount);
    2929    bool isCachable() const override { return true; }
    3030    bool hasSignature() const override { return false; }
     
    3636    const unsigned mStreamCount;
    3737};
    38    
     38
    3939// Compress within fields of size fw.
    4040class FieldCompressKernel final : public MultiBlockKernel {
     
    108108class DeleteByPEXTkernel final : public BlockOrientedKernel {
    109109public:
    110     DeleteByPEXTkernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, unsigned fw, unsigned streamCount, unsigned PEXT_width = sizeof(size_t) * 8);
     110    DeleteByPEXTkernel(const std::unique_ptr<kernel::KernelBuilder> & b, unsigned fw, unsigned streamCount, unsigned PEXT_width = sizeof(size_t) * 8);
    111111    bool isCachable() const override { return true; }
    112112    bool hasSignature() const override { return false; }
     
    121121    const unsigned mPEXTWidth;
    122122};
    123    
     123
    124124class SwizzledBitstreamCompressByCount final : public BlockOrientedKernel {
    125125public:
    126     SwizzledBitstreamCompressByCount(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, unsigned bitStreamCount, unsigned fieldWidth = sizeof(size_t) * 8);
     126    SwizzledBitstreamCompressByCount(const std::unique_ptr<kernel::KernelBuilder> & b, unsigned bitStreamCount, unsigned fieldWidth = sizeof(size_t) * 8);
    127127    bool isCachable() const override { return true; }
    128128    bool hasSignature() const override { return false; }
     
    137137};
    138138
    139    
     139
    140140}
    141    
     141
    142142#endif
    143143
  • icGREP/icgrep-devel/icgrep/kernels/directorysearch.cpp

    r6193 r6261  
    306306                                 StreamSet * const fileNameStream,
    307307                                 const unsigned filesPerSegment, const bool recursive = true, const bool includeHidden = false)
    308 : SegmentOrientedKernel("DirectorySearch" + (recursive ? "R" : "") + (includeHidden ? "H" :"")
     308: SegmentOrientedKernel(b, "DirectorySearch" + (recursive ? "R" : "") + (includeHidden ? "H" :"")
    309309// input streams
    310310,{}
  • icGREP/icgrep-devel/icgrep/kernels/evenodd.cpp

    r5793 r6261  
    1818}
    1919
    20 EvenOddKernel::EvenOddKernel(const std::unique_ptr<kernel::KernelBuilder> & builder)
    21 : BlockOrientedKernel("EvenOdd", {Binding{builder->getStreamSetTy(8, 1), "BasisBits"}}, {Binding{builder->getStreamSetTy(2, 1), "even_odd"}}, {}, {}, {}) {
     20EvenOddKernel::EvenOddKernel(const std::unique_ptr<kernel::KernelBuilder> & b)
     21: BlockOrientedKernel(b, "EvenOdd", {Binding{b->getStreamSetTy(8, 1), "BasisBits"}}, {Binding{b->getStreamSetTy(2, 1), "even_odd"}}, {}, {}, {}) {
    2222
    2323}
  • icGREP/icgrep-devel/icgrep/kernels/evenodd.h

    r5440 r6261  
    1414class EvenOddKernel final : public BlockOrientedKernel {
    1515public:
    16     EvenOddKernel(const std::unique_ptr<kernel::KernelBuilder> & builder);
     16    EvenOddKernel(const std::unique_ptr<kernel::KernelBuilder> & b);
    1717private:
    1818    void generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
  • icGREP/icgrep-devel/icgrep/kernels/fake_stream_generating_kernel.cpp

    r6184 r6261  
    1212                                                       StreamSet * refStream,
    1313                                                       StreamSet * outputStream)
    14 : SegmentOrientedKernel("FakeStream",
     14: SegmentOrientedKernel(b, "FakeStream",
    1515// input stream sets
    1616{Binding{"inputStream", refStream}},
     
    2424                                                       StreamSet * refStream,
    2525                                                       const StreamSets & outputStreams)
    26 : SegmentOrientedKernel("FakeStream",
     26: SegmentOrientedKernel(b, "FakeStream",
    2727// input stream sets
    2828{Binding{"inputStream", refStream}},
  • icGREP/icgrep-devel/icgrep/kernels/grep_kernel.cpp

    r6252 r6261  
    9191    PabloAST * const u8pfx4 = ccc->compileCC(makeByte(0xF0, 0xF4), it);
    9292    PabloAST * const u8suffix = ccc->compileCC("u8suffix", makeByte(0x80, 0xBF), it);
    93    
     93
    9494    //
    9595    // Two-byte sequences
     
    101101
    102102    //
    103     // Three-byte sequences   
     103    // Three-byte sequences
    104104    Var * const EF_invalid = it.createVar("EF_invalid", ZEROES);
    105105    auto it3 = it.createScope();
     
    130130    PabloAST * const FX_invalid = it4.createOr(F0_invalid, F4_invalid);
    131131    it4.createAssign(EF_invalid, it4.createOr(EF_invalid, FX_invalid));
    132    
     132
    133133    //
    134134    // Invalid cases
     
    146146    //pb.createAssign(nonFinal, pb.createOr(nonFinal, CRLF));
    147147    //PabloAST * unterminatedLineAtEOF = pb.createAtEOF(pb.createAdvance(pb.createNot(LineBreak), 1), "unterminatedLineAtEOF");
    148    
     148
    149149    Var * const required = getOutputStreamVar("nonFinal");
    150150    pb.createAssign(pb.createExtract(required, pb.getInteger(0)), nonFinal);
     
    535535}
    536536
    537 InvertMatchesKernel::InvertMatchesKernel(const std::unique_ptr<kernel::KernelBuilder> & builder, StreamSet * OriginalMatches, StreamSet * LineBreakStream, StreamSet * Matches)
    538 : BlockOrientedKernel("Invert",
     537InvertMatchesKernel::InvertMatchesKernel(const std::unique_ptr<kernel::KernelBuilder> & b, StreamSet * OriginalMatches, StreamSet * LineBreakStream, StreamSet * Matches)
     538: BlockOrientedKernel(b, "Invert",
    539539// Inputs
    540540{Binding{"matchedLines", OriginalMatches},
     
    663663}
    664664
    665 AbortOnNull::AbortOnNull(const std::unique_ptr<kernel::KernelBuilder> &, StreamSet * const InputStream, StreamSet * const OutputStream, Scalar * callbackObject)
    666 : MultiBlockKernel("AbortOnNull",
     665AbortOnNull::AbortOnNull(const std::unique_ptr<kernel::KernelBuilder> & b, StreamSet * const InputStream, StreamSet * const OutputStream, Scalar * callbackObject)
     666: MultiBlockKernel(b, "AbortOnNull",
    667667// inputs
    668668{Binding{"byteData", InputStream, FixedRate(), Principal()}},
  • icGREP/icgrep-devel/icgrep/kernels/grep_kernel.h

    r6250 r6261  
    1414namespace kernel {
    1515
    16    
     16
    1717class UTF8_nonFinal : public pablo::PabloKernel {
    1818public:
     
    101101    std::unique_ptr<GrepKernelOptions> mOptions;
    102102};
    103    
     103
    104104struct ByteBitGrepSignature {
    105105    ByteBitGrepSignature(re::RE * prefix, re::RE * suffix);
     
    110110};
    111111
    112    
     112
    113113class ByteBitGrepKernel : public ByteBitGrepSignature, public pablo::PabloKernel {
    114114    using Externals = std::vector<std::pair<std::string, StreamSet *>>;
     
    131131    bool hasSignature() const override { return false; }
    132132protected:
    133     void generatePabloMethod() override;   
     133    void generatePabloMethod() override;
    134134};
    135135
    136136class InvertMatchesKernel : public BlockOrientedKernel {
    137137public:
    138     InvertMatchesKernel(const std::unique_ptr<kernel::KernelBuilder> & builder, StreamSet * OriginalMatches, StreamSet * LineBreakStream, StreamSet * Matches);
     138    InvertMatchesKernel(const std::unique_ptr<kernel::KernelBuilder> & b, StreamSet * OriginalMatches, StreamSet * LineBreakStream, StreamSet * Matches);
    139139private:
    140140    void generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
     
    147147    bool hasSignature() const override { return false; }
    148148protected:
    149     void generatePabloMethod() override;   
     149    void generatePabloMethod() override;
    150150};
    151151
     
    155155private:
    156156    void generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & b, llvm::Value * const numOfStrides) final;
    157    
     157
    158158};
    159159
  • icGREP/icgrep-devel/icgrep/kernels/hex_convert.cpp

    r6184 r6261  
    1212using namespace llvm;
    1313
    14 HexToBinary::HexToBinary(const std::unique_ptr<kernel::KernelBuilder> & /* b */, StreamSet * hexStream, StreamSet * binStream)
    15 : BlockOrientedKernel("HexToBinary",
     14HexToBinary::HexToBinary(const std::unique_ptr<kernel::KernelBuilder> & b, StreamSet * hexStream, StreamSet * binStream)
     15: BlockOrientedKernel(b, "HexToBinary",
    1616                   {Binding{"hexdata", hexStream, FixedRate()}},
    1717                   {Binding{"binary_data", binStream, FixedRate(4)}},
     
    4343        //b->CallPrintInt("binary_pack ptr", b->CreateGEP(outputStreamBasePtr, b->CreateUDiv(packNumPhi, TWO)));
    4444        Value * binary_pack = b->bitCast(b->hsimd_packl(8, base_val[0], base_val[1]));
    45         b->storeOutputStreamBlock("binary_data", ZERO, b->getSize(i), binary_pack); 
     45        b->storeOutputStreamBlock("binary_data", ZERO, b->getSize(i), binary_pack);
    4646    }
    4747}
    4848
    49 BinaryToHex::BinaryToHex(const std::unique_ptr<kernel::KernelBuilder> & /* b */, StreamSet * binStream, StreamSet * hexStream)
    50 : BlockOrientedKernel("BinaryToHex",
     49BinaryToHex::BinaryToHex(const std::unique_ptr<kernel::KernelBuilder> & b, StreamSet * binStream, StreamSet * hexStream)
     50: BlockOrientedKernel(b, "BinaryToHex",
    5151                   {Binding{"binary_data", binStream, FixedRate(4)}},
    5252                   {Binding{"hexdata", hexStream, FixedRate(), RoundUpTo(1)}},
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r6253 r6261  
    8585void Kernel::addBaseKernelProperties(const std::unique_ptr<KernelBuilder> & b) {
    8686
    87     // Set the default kernel stride.
    88     if (mStride == 0) {
    89         mStride = b->getBitBlockWidth();
    90     }
    91 
    9287    // TODO: if a stream has an Expandable or ManagedBuffer attribute or is produced at an Unknown rate,
    9388    // the pipeline ought to pass the stream as a DynamicBuffer. This will require some coordination between
     
    269264    std::vector<Type *> params;
    270265    params.reserve(2 + mInputStreamSets.size() + mOutputStreamSets.size());
    271     params.push_back(mKernelStateType->getPointerTo());  // self
     266    params.push_back(mKernelStateType->getPointerTo());  // handle
    272267    params.push_back(sizeTy); // numOfStrides
    273268    for (unsigned i = 0; i < mInputStreamSets.size(); ++i) {
     
    379374
    380375    const auto numOfInputs = getNumOfStreamInputs();
    381     reset(mProcessedInputItems, numOfInputs);
     376    reset(mProcessedInputItemPtr, numOfInputs);
    382377    reset(mAccessibleInputItems, numOfInputs);
    383378    reset(mAvailableInputItems, numOfInputs);
     
    421416            std::tie(port, index) = getStreamPort(rate.getReference());
    422417            assert (port == Port::Input && index < i);
    423             assert (mProcessedInputItems[index]);
    424             Value * const ref = b->CreateLoad(mProcessedInputItems[index]);
     418            assert (mProcessedInputItemPtr[index]);
     419            Value * const ref = b->CreateLoad(mProcessedInputItemPtr[index]);
    425420            processed = b->CreateMul2(ref, rate.getRate());
    426421        }
    427422        AllocaInst * const processedItems = b->CreateAlloca(sizeTy);
    428423        b->CreateStore(processed, processedItems);
    429         mProcessedInputItems[i] = processedItems;
     424        mProcessedInputItemPtr[i] = processedItems;
    430425        /// ----------------------------------------------------
    431426        /// accessible item count
     
    454449    // set all of the output buffers
    455450    const auto numOfOutputs = getNumOfStreamOutputs();
    456     reset(mProducedOutputItems, numOfOutputs);
     451    reset(mProducedOutputItemPtr, numOfOutputs);
    457452    reset(mWritableOutputItems, numOfOutputs);
    458453    reset(mConsumedOutputItems, numOfOutputs);
     
    502497            std::tie(port, index) = getStreamPort(rate.getReference());
    503498            assert (port == Port::Input || (port == Port::Output && index < i));
    504             const auto & items = (port == Port::Input) ? mProcessedInputItems : mProducedOutputItems;
     499            const auto & items = (port == Port::Input) ? mProcessedInputItemPtr : mProducedOutputItemPtr;
    505500            Value * const ref = b->CreateLoad(items[index]);
    506501            produced = b->CreateMul2(ref, rate.getRate());
     
    508503        AllocaInst * const producedItems = b->CreateAlloca(sizeTy);
    509504        b->CreateStore(produced, producedItems);
    510         mProducedOutputItems[i] = producedItems;
     505        mProducedOutputItemPtr[i] = producedItems;
    511506        /// ----------------------------------------------------
    512507        /// consumed or writable item count
     
    538533    for (unsigned i = 0; i < numOfInputs; i++) {
    539534        if (updatableProcessedInputItems[i]) {
    540             Value * const items = b->CreateLoad(mProcessedInputItems[i]);
     535            Value * const items = b->CreateLoad(mProcessedInputItemPtr[i]);
    541536            b->CreateStore(items, updatableProcessedInputItems[i]);
    542537        }
     
    545540    for (unsigned i = 0; i < numOfOutputs; i++) {
    546541        if (updatableProducedOutputItems[i]) {
    547             Value * const items = b->CreateLoad(mProducedOutputItems[i]);
     542            Value * const items = b->CreateLoad(mProducedOutputItemPtr[i]);
    548543            b->CreateStore(items, updatableProducedOutputItems[i]);
    549544        }
     
    721716        report_fatal_error(getName() + ": cannot prepare kernel after kernel state finalized");
    722717    }
     718    if (LLVM_UNLIKELY(mStride == 0)) {
     719        report_fatal_error(getName() + ": stride cannot be 0");
     720    }
    723721    addBaseKernelProperties(b);
    724722    addInternalKernelProperties(b);
     
    728726    }
    729727    mKernelStateType = mModule->getTypeByName(getName());
    730 
    731 
    732728    if (LLVM_LIKELY(mKernelStateType == nullptr)) {
    733729        std::vector<llvm::Type *> fields;
     
    747743        mKernelStateType = StructType::create(b->getContext(), fields, getName());
    748744    }
    749 
    750 
    751 
    752 
    753745    assert (isa<StructType>(mKernelStateType));
    754746}
     
    838830
    839831/** ------------------------------------------------------------------------------------------------------------- *
     832 * @brief addAttributesFrom
     833 *
     834 * Add any attributes from a set of kernels
     835 ** ------------------------------------------------------------------------------------------------------------- */
     836void Kernel::addAttributesFrom(const std::vector<Kernel *> & kernels) {
     837    unsigned mustTerminate = 0;
     838    bool canTerminate = false;
     839    bool sideEffecting = false;
     840    for (const Kernel * kernel : kernels) {
     841        if (kernel->hasAttribute(AttrId::MustExplicitlyTerminate)) {
     842            mustTerminate++;
     843        } else if (kernel->hasAttribute(AttrId::CanTerminateEarly)) {
     844            canTerminate = true;
     845        }
     846        if (kernel->hasAttribute(AttrId::SideEffecting)) {
     847            sideEffecting = true;
     848        }
     849    }
     850    if (LLVM_UNLIKELY(mustTerminate == kernels.size())) {
     851        addAttribute(MustExplicitlyTerminate());
     852    } else if (canTerminate || mustTerminate) {
     853        addAttribute(CanTerminateEarly());
     854    }
     855    if (sideEffecting) {
     856        addAttribute(SideEffecting());
     857    }
     858}
     859
     860/** ------------------------------------------------------------------------------------------------------------- *
    840861 * @brief createInstance
    841862 ** ------------------------------------------------------------------------------------------------------------- */
     
    11641185
    11651186// CONSTRUCTOR
    1166 Kernel::Kernel(const TypeId typeId,
     1187Kernel::Kernel(const std::unique_ptr<KernelBuilder> & b,
     1188               const TypeId typeId,
    11671189               std::string && kernelName,
    11681190               Bindings && stream_inputs,
     
    11811203, mInternalScalars( std::move(internal_scalars))
    11821204, mCurrentMethod(nullptr)
    1183 , mStride(0)
     1205, mStride(b->getBitBlockWidth())
    11841206, mTerminationSignalPtr(nullptr)
    11851207, mIsFinal(nullptr)
     
    11931215
    11941216// CONSTRUCTOR
    1195 SegmentOrientedKernel::SegmentOrientedKernel(std::string && kernelName,
     1217SegmentOrientedKernel::SegmentOrientedKernel(const std::unique_ptr<KernelBuilder> & b,
     1218                                             std::string && kernelName,
    11961219                                             Bindings && stream_inputs,
    11971220                                             Bindings && stream_outputs,
     
    11991222                                             Bindings && scalar_outputs,
    12001223                                             Bindings && internal_scalars)
    1201 : Kernel(TypeId::SegmentOriented, std::move(kernelName),
    1202          std::move(stream_inputs), std::move(stream_outputs),
    1203          std::move(scalar_parameters), std::move(scalar_outputs),
    1204          std::move(internal_scalars))  {
    1205 
    1206 }
    1207 
    1208 
    1209 }
     1224: Kernel(b,
     1225TypeId::SegmentOriented, std::move(kernelName),
     1226std::move(stream_inputs), std::move(stream_outputs),
     1227std::move(scalar_parameters), std::move(scalar_outputs),
     1228std::move(internal_scalars)) {
     1229
     1230}
     1231
     1232
     1233}
  • icGREP/icgrep-devel/icgrep/kernels/kernel.h

    r6253 r6261  
    458458        std::tie(port, index) = getStreamPort(name);
    459459        assert (port == Port::Input);
    460         return mProcessedInputItems[index];
     460        return mProcessedInputItemPtr[index];
    461461    }
    462462
     
    465465        std::tie(port, index) = getStreamPort(name);
    466466        assert (port == Port::Output);
    467         return mProducedOutputItems[index];
     467        return mProducedOutputItemPtr[index];
    468468    }
    469469
     
    480480
    481481    // Constructor
    482     Kernel(const TypeId typeId, std::string && kernelName,
     482    Kernel(const std::unique_ptr<KernelBuilder> & b,
     483           const TypeId typeId, std::string && kernelName,
    483484           Bindings && stream_inputs, Bindings && stream_outputs,
    484485           Bindings && scalar_inputs, Bindings && scalar_outputs,
     
    486487
    487488private:
     489
     490    void addAttributesFrom(const std::vector<Kernel *> & kernels);
    488491
    489492    void callGenerateInitializeMethod(const std::unique_ptr<KernelBuilder> & b);
     
    528531    llvm::Value *                   mNumOfStrides;
    529532
    530     std::vector<llvm::Value *>      mProcessedInputItems;
     533    std::vector<llvm::Value *>      mProcessedInputItemPtr;
    531534    std::vector<llvm::Value *>      mAccessibleInputItems;
    532535    std::vector<llvm::Value *>      mAvailableInputItems;
    533536    std::vector<llvm::Value *>      mPopCountRateArray;
    534537    std::vector<llvm::Value *>      mNegatedPopCountRateArray;
    535     std::vector<llvm::Value *>      mProducedOutputItems;
     538    std::vector<llvm::Value *>      mProducedOutputItemPtr;
    536539    std::vector<llvm::Value *>      mWritableOutputItems;
    537540    std::vector<llvm::Value *>      mConsumedOutputItems;
     
    559562protected:
    560563
    561     SegmentOrientedKernel(std::string && kernelName,
     564    SegmentOrientedKernel(const std::unique_ptr<KernelBuilder> & b,
     565                          std::string && kernelName,
    562566                          Bindings && stream_inputs,
    563567                          Bindings && stream_outputs,
     
    587591protected:
    588592
    589     MultiBlockKernel(std::string && kernelName,
    590                      Bindings && stream_inputs,
    591                      Bindings && stream_outputs,
    592                      Bindings && scalar_parameters,
    593                      Bindings && scalar_outputs,
    594                      Bindings && internal_scalars);
    595 
    596     virtual void generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & b, llvm::Value * const numOfStrides) = 0;
    597 
    598 private:
    599 
    600     MultiBlockKernel(const TypeId kernelTypId,
     593    MultiBlockKernel(const std::unique_ptr<KernelBuilder> & b,
    601594                     std::string && kernelName,
    602595                     Bindings && stream_inputs,
     
    606599                     Bindings && internal_scalars);
    607600
     601    virtual void generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & b, llvm::Value * const numOfStrides) = 0;
     602
     603private:
     604
     605    MultiBlockKernel(const std::unique_ptr<KernelBuilder> & b,
     606                     const TypeId kernelTypId,
     607                     std::string && kernelName,
     608                     Bindings && stream_inputs,
     609                     Bindings && stream_outputs,
     610                     Bindings && scalar_parameters,
     611                     Bindings && scalar_outputs,
     612                     Bindings && internal_scalars);
     613
    608614private:
    609615
     
    639645    virtual void generateFinalBlockMethod(const std::unique_ptr<KernelBuilder> & b, llvm::Value * remainingItems);
    640646
    641     BlockOrientedKernel(std::string && kernelName,
     647    BlockOrientedKernel(const std::unique_ptr<KernelBuilder> & b,
     648                        std::string && kernelName,
    642649                        Bindings && stream_inputs,
    643650                        Bindings && stream_outputs,
  • icGREP/icgrep-devel/icgrep/kernels/lz4/decompression/lz4_sequential_decompression_base.cpp

    r6184 r6261  
    2525// constants
    2626unsigned blockSize, bool conditionalDecompression)
    27 : SegmentOrientedKernel(std::move(kernelName),
     27: SegmentOrientedKernel(b, std::move(kernelName),
    2828// Inputs
    2929{Binding{"byteStream", byteStream, BoundedRate(0, 1)}, // , AlwaysConsume()
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_block_decoder.cpp

    r6184 r6261  
    2222                                             // outputs
    2323                                             StreamSet * isCompressed, StreamSet * blockStart, StreamSet * blockEnd)
    24 : SegmentOrientedKernel("LZ4BlockdDecoder",
     24: SegmentOrientedKernel(b, "LZ4BlockdDecoder",
    2525// Inputs
    2626{
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_bytestream_decoder.cpp

    r6193 r6261  
    179179                                                       // output
    180180                                                       StreamSet * outputStream)
    181 : MultiBlockKernel("lz4ByteStreamDecoder",
     181: MultiBlockKernel(b, "lz4ByteStreamDecoder",
    182182// Inputs
    183183{Binding{"literalIndexes", literalIndexes},
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_index_decoder.cpp

    r6184 r6261  
    77#include "lz4_index_decoder.h"
    88#include <kernels/kernel_builder.h>
    9  
     9
    1010using namespace llvm;
    1111using namespace kernel;
     
    203203    BasicBlock * atBlockChecksum = b->CreateBasicBlock("at_block_checksum");
    204204    generateAtBlockChecksum(b, atBlockChecksum, skippingBytes);
    205  
     205
    206206    // %at_block_size
    207207    BasicBlock * atBlockSize = b->CreateBasicBlock("at_block_size");
     
    588588    // number of extender = matchExtEnd - wordOffset
    589589    Value * numExtenders = b->CreateSub(matchExtEnd, wordOffset);
    590     Value * matchExtReachBoundary = 
     590    Value * matchExtReachBoundary =
    591591            b->CreateICmpEQ(matchExtEnd, b->getInt32(wordWidth));
    592592    // There are matchExtEnd forward zeroes, we load bytes[matchExtEnd]
     
    680680                                             StreamSet * literalIndexes,
    681681                                             StreamSet * matchIndexes)
    682 : BlockOrientedKernel("lz4IndexDecoder",
     682: BlockOrientedKernel(b, "lz4IndexDecoder",
    683683// Inputs
    684684{Binding{"byteStream", byteStream, FixedRate(), Misaligned()},
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_match_detector.cpp

    r6184 r6261  
    1212
    1313namespace kernel {
    14     LZ4MatchDetectorKernel::LZ4MatchDetectorKernel(const std::unique_ptr<kernel::KernelBuilder> &iBuilder, unsigned blockSize)
    15             : SegmentOrientedKernel("LZ4MatchDetectorKernel",
     14    LZ4MatchDetectorKernel::LZ4MatchDetectorKernel(const std::unique_ptr<kernel::KernelBuilder> &b, unsigned blockSize)
     15            : SegmentOrientedKernel(b, "LZ4MatchDetectorKernel",
    1616// Inputs
    1717                                    {
    18                                             Binding{iBuilder->getStreamSetTy(1), "matches", BoundedRate(0, 1)},
    19                                             Binding{iBuilder->getStreamSetTy(1), "linebreak", RateEqualTo("matches")}
     18                                            Binding{b->getStreamSetTy(1), "matches", BoundedRate(0, 1)},
     19                                            Binding{b->getStreamSetTy(1), "linebreak", RateEqualTo("matches")}
    2020                                    },
    2121//Outputs
    2222                                    {
    2323
    24                                             Binding{iBuilder->getStreamSetTy(1, 8), "hasMatches", BoundedRate(0, 1)}
     24                                            Binding{b->getStreamSetTy(1, 8), "hasMatches", BoundedRate(0, 1)}
    2525                                    },
    2626//Arguments
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_match_detector.h

    r6150 r6261  
    2020    class LZ4MatchDetectorKernel : public SegmentOrientedKernel {
    2121    public:
    22         LZ4MatchDetectorKernel(const std::unique_ptr<kernel::KernelBuilder> &iBuilder, unsigned blockSize = 4 * 1024 * 1024);
     22        LZ4MatchDetectorKernel(const std::unique_ptr<kernel::KernelBuilder> & b, unsigned blockSize = 4 * 1024 * 1024);
    2323    protected:
    2424        void generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & b) override;
  • icGREP/icgrep-devel/icgrep/kernels/lz4/twist_kernel.cpp

    r6184 r6261  
    4242
    4343TwistByPDEPKernel::TwistByPDEPKernel(const std::unique_ptr <kernel::KernelBuilder> &b, unsigned numberOfInputStream, unsigned twistWidth)
    44 : BlockOrientedKernel("TwistByPDEPKernel",
     44: BlockOrientedKernel(b, "TwistByPDEPKernel",
    4545{Binding{b->getStreamSetTy(numberOfInputStream, 1), "basisBits"}},
    4646{Binding{b->getStreamSetTy(1, twistWidth), "byteStream"}},
     
    7373                                                     const StreamSets & inputStreams,
    7474                                                     StreamSet * outputStream)
    75 : BlockOrientedKernel("TwistMultipleByPDEPKernel",
     75: BlockOrientedKernel(b, "TwistMultipleByPDEPKernel",
    7676{},
    7777{Binding{"byteStream", outputStream}},
  • icGREP/icgrep-devel/icgrep/kernels/lz4/untwist_kernel.cpp

    r6184 r6261  
    4848
    4949
    50 UntwistByPEXTKernel::UntwistByPEXTKernel(const std::unique_ptr<kernel::KernelBuilder> &, StreamSet * inputStream, StreamSet * outputStream)
    51 : BlockOrientedKernel("UntwistByPEXTKernel",
     50UntwistByPEXTKernel::UntwistByPEXTKernel(const std::unique_ptr<kernel::KernelBuilder> & b, StreamSet * inputStream, StreamSet * outputStream)
     51: BlockOrientedKernel(b, "UntwistByPEXTKernel",
    5252// input
    5353{Binding{"byteStream", inputStream}},
     
    7575
    7676
    77 UntwistMultipleByPEXTKernel::UntwistMultipleByPEXTKernel(const std::unique_ptr<kernel::KernelBuilder> &b,
     77UntwistMultipleByPEXTKernel::UntwistMultipleByPEXTKernel(const std::unique_ptr<kernel::KernelBuilder> & b,
    7878                                                         StreamSet * inputStream,
    7979                                                         const StreamSets & outputStreams)
    80 : BlockOrientedKernel("UntwistMultipleByPEXTKernel",
     80: BlockOrientedKernel(b, "UntwistMultipleByPEXTKernel",
    8181// input
    8282{Binding{"byteStream", inputStream}},
  • icGREP/icgrep-devel/icgrep/kernels/lz4/untwist_kernel.h

    r6184 r6261  
    1313    class UntwistByPEXTKernel final : public BlockOrientedKernel{
    1414    public:
    15         UntwistByPEXTKernel(const std::unique_ptr<kernel::KernelBuilder> &, StreamSet * inputStream, StreamSet * outputStream);
     15        UntwistByPEXTKernel(const std::unique_ptr<kernel::KernelBuilder> & b, StreamSet * inputStream, StreamSet * outputStream);
    1616    protected:
    1717        const size_t mNumberOfOutputStream;
  • icGREP/icgrep-devel/icgrep/kernels/multiblock_kernel.cpp

    r6253 r6261  
    4242// MULTI-BLOCK KERNEL CONSTRUCTOR
    4343MultiBlockKernel::MultiBlockKernel(
     44    const std::unique_ptr<KernelBuilder> &b,
    4445    std::string && kernelName,
    4546    Bindings && stream_inputs,
     
    4849    Bindings && scalar_outputs,
    4950    Bindings && internal_scalars)
    50 : MultiBlockKernel(TypeId::MultiBlock,
     51: MultiBlockKernel(b,
     52    TypeId::MultiBlock,
    5153    std::move(kernelName),
    5254    std::move(stream_inputs),
     
    5860}
    5961
    60 MultiBlockKernel::MultiBlockKernel(
     62MultiBlockKernel::MultiBlockKernel(const std::unique_ptr<KernelBuilder> &b,
    6163    const TypeId typeId,
    6264    std::string && kernelName,
     
    6668    Bindings && scalar_outputs,
    6769    Bindings && internal_scalars)
    68 : Kernel(typeId,
     70: Kernel(b, typeId,
    6971     std::move(kernelName),
    7072     std::move(stream_inputs),
  • icGREP/icgrep-devel/icgrep/kernels/optimizationbranch.cpp

    r6253 r6261  
    11#include "optimizationbranch.h"
     2#include <kernels/kernel_builder.h>
    23
    34#warning at compilation, this must verify that the I/O rates of the branch permits the rates of the branches
     
    78namespace kernel {
    89
     10using AttrId = Attribute::KindId;
     11
     12const std::string OptimizationBranch::CONDITION_TAG = "@condition";
     13
     14/** ------------------------------------------------------------------------------------------------------------- *
     15 * @brief linkExternalMethods
     16 ** ------------------------------------------------------------------------------------------------------------- */
    917void OptimizationBranch::linkExternalMethods(const std::unique_ptr<KernelBuilder> & b) {
    1018    mTrueKernel->linkExternalMethods(b);
     
    1220}
    1321
     22/** ------------------------------------------------------------------------------------------------------------- *
     23 * @brief generateInitializeMethod
     24 ** ------------------------------------------------------------------------------------------------------------- */
    1425void OptimizationBranch::generateInitializeMethod(const std::unique_ptr<KernelBuilder> & b) {
    1526    mTrueKernel->generateInitializeMethod(b);
     
    1728}
    1829
     30/** ------------------------------------------------------------------------------------------------------------- *
     31 * @brief initializeInstance
     32 ** ------------------------------------------------------------------------------------------------------------- */
    1933void OptimizationBranch::initializeInstance(const std::unique_ptr<KernelBuilder> & b, std::vector<llvm::Value *> & args) {
    20 
    21 }
    22 
     34    mTrueKernel->initializeInstance(b, args);
     35    mFalseKernel->initializeInstance(b, args);
     36}
     37
     38/** ------------------------------------------------------------------------------------------------------------- *
     39 * @brief isParamAddressable
     40 ** ------------------------------------------------------------------------------------------------------------- */
     41inline bool isParamAddressable(const Binding & binding) {
     42    if (binding.isDeferred()) {
     43        return true;
     44    }
     45    const ProcessingRate & rate = binding.getRate();
     46    return (rate.isBounded() || rate.isUnknown());
     47}
     48
     49/** ------------------------------------------------------------------------------------------------------------- *
     50 * @brief isParamConstant
     51 ** ------------------------------------------------------------------------------------------------------------- */
     52inline bool isParamConstant(const Binding & binding) {
     53    assert (!binding.isDeferred());
     54    const ProcessingRate & rate = binding.getRate();
     55    return rate.isFixed() || rate.isPopCount() || rate.isNegatedPopCount();
     56}
     57
     58/** ------------------------------------------------------------------------------------------------------------- *
     59 * @brief hasParam
     60 ** ------------------------------------------------------------------------------------------------------------- */
     61inline bool hasParam(const Binding & binding) {
     62    return !binding.getRate().isRelative();
     63}
     64
     65/** ------------------------------------------------------------------------------------------------------------- *
     66 * @brief callKernel
     67 ** ------------------------------------------------------------------------------------------------------------- */
     68void OptimizationBranch::callKernel(const std::unique_ptr<KernelBuilder> & b,
     69                                    const Kernel * const kernel, std::vector<Value *> & args,
     70                                    PHINode * const terminatedPhi) {
     71    args[0] = kernel->getHandle();
     72    Value * terminated = b->CreateCall(kernel->getDoSegmentFunction(b->getModule()), args);
     73    if (terminatedPhi) {
     74        if (LLVM_UNLIKELY(kernel->canSetTerminateSignal())) {
     75            terminated = b->getFalse();
     76        }
     77        terminatedPhi->addIncoming(terminated, b->GetInsertBlock());
     78    }
     79}
     80
     81/** ------------------------------------------------------------------------------------------------------------- *
     82 * @brief generateKernelMethod
     83 ** ------------------------------------------------------------------------------------------------------------- */
    2384void OptimizationBranch::generateKernelMethod(const std::unique_ptr<KernelBuilder> & b) {
    24 
    25 }
    26 
     85#if 0
     86
     87    BasicBlock * const loopCond = b->CreateBasicBlock("cond");
     88    BasicBlock * const nonZeroPath = b->CreateBasicBlock("nonZeroPath");
     89    BasicBlock * const allZeroPath = b->CreateBasicBlock("allZeroPath");
     90    BasicBlock * const mergePaths = b->CreateBasicBlock("mergePaths");
     91    BasicBlock * const exit = b->CreateBasicBlock("exit");
     92
     93    Constant * const ZERO = b->getSize(0);
     94    Constant * const ONE = b->getSize(1);
     95
     96    const auto numOfInputs = getNumOfStreamInputs();
     97    std::vector<Value *> initialInputItems(numOfInputs, nullptr);
     98    for (unsigned i = 0; i < numOfInputs; ++i) {
     99        if (isParamConstant(mInputStreamSets[i])) {
     100            initialInputItems[i] = b->CreateLoad(mProcessedInputItemPtr[i]);
     101        }
     102    }
     103
     104    const auto numOfOutputs = getNumOfStreamOutputs();
     105    std::vector<Value *> initialOutputItems(numOfOutputs, nullptr);
     106    for (unsigned i = 0; i < numOfOutputs; ++i) {
     107        if (isParamConstant(mOutputStreamSets[i])) {
     108            initialOutputItems[i] = b->CreateLoad(mProducedOutputItemPtr[i]);
     109        }
     110    }
     111
     112    BasicBlock * const entry = b->GetInsertBlock();
     113    b->CreateBr(loopCond);
     114
     115    std::vector<Value *> args;
     116    PHINode * terminatedPhi = nullptr;
     117    if (canSetTerminateSignal()) {
     118        b->SetInsertPoint(mergePaths);
     119        terminatedPhi = b->CreatePHINode(b->getInt1Ty(), 2);
     120    }
     121
     122    b->SetInsertPoint(loopCond);
     123
     124    if (LLVM_LIKELY(isa<StreamSet>(mCondition))) {
     125
     126        Type * const BitBlockTy = b->getBitBlockType();
     127        IntegerType * const sizeTy = b->getSizeTy();
     128
     129        PHINode * const index = b->CreatePHI(sizeTy, 3);
     130        index->addIncoming(ZERO, entry);
     131        PHINode * const first = b->CreatePHI(sizeTy, 3);
     132        first->addIncoming(ZERO, entry);
     133        PHINode * const state = b->CreatePHI(b->getInt1Ty(), 3);
     134        state->addIncoming(b->getFalse(), entry);
     135
     136        const auto numOfInputs = getNumOfStreamInputs() - 1; // the final input is our condition stream
     137        std::vector<PHINode *> inputPhis(numOfInputs);
     138        for (unsigned i = 0; i < numOfInputs; ++i) {
     139            PHINode * const inputPhi = b->CreatePHI(sizeTy, 3);
     140            inputPhi->addIncoming(getAccessibleInputItems(i), entry);
     141            inputPhis[i] = inputPhi;
     142        }
     143
     144        const auto numOfOutputs = getNumOfStreamOutputs();
     145        std::vector<PHINode *> outputPhis(numOfOutputs);
     146        for (unsigned i = 0; i < numOfOutputs; ++i) {
     147            PHINode * const outputPhi = b->CreatePHI(sizeTy, 3);
     148            outputPhi->addIncoming(getWritableInputItems(i), entry);
     149            outputPhis[i] = outputPhi;
     150        }
     151
     152
     153        BasicBlock * const summarizeOneStride = b->CreateBasicBlock("summarizeOneStride", nonZeroPath);
     154        BasicBlock * const checkStride = b->CreateBasicBlock("checkStride", nonZeroPath);
     155        BasicBlock * const processStrides = b->CreateBasicBlock("processStrides", nonZeroPath);
     156
     157        Constant * const strideCount = b->getSize(getStride() / b->getBitBlockWidth());
     158
     159        Value * const streamCount = b->getInputStreamSetCount(CONDITION_TAG);
     160        Value * const blocksPerStride = b->CreateMul(streamCount, strideCount);
     161
     162
     163        Value * const offset = b->CreateMul(index, strideCount);
     164        Value * basePtr = b->getInputStreamBlockPtr(CONDITION_TAG, ZERO, offset);
     165        basePtr = b->CreatePointerCast(basePtr, BitBlockTy->getPointerTo());
     166        b->CreateBr(summarizeOneStride);
     167
     168        // OR together every condition block in this stride
     169        b->SetInsertPoint(summarizeOneStride);
     170        PHINode * const iteration = b->CreatePHI(b->getSizeTy(), 2);
     171        iteration->addIncoming(ZERO, loopCond);
     172        PHINode * const merged = b->CreatePHI(BitBlockTy, 2);
     173        merged->addIncoming(Constant::getNullValue(BitBlockTy), loopCond);
     174        Value * value = b->CreateBlockAlignedLoad(basePtr, iteration);
     175        value = b->CreateOr(value, merged);
     176        merged->addIncoming(value, summarizeOneStride);
     177        Value * const nextIteration = b->CreateAdd(iteration, ONE);
     178        Value * const more = b->CreateICmpNE(nextIteration, blocksPerStride);
     179        b->CreateCondBr(more, summarizeOneStride, checkStride);
     180
     181        // Check the merged value of our condition block(s); if it differs from
     182        // the prior value or this is our last stride, then process the strides.
     183        // Note, however, initially state is "indeterminate" so we silently
     184        // ignore the first stride unless it is also our last.
     185        b->SetInsertPoint(checkStride);
     186        Value * const nextState = b->bitblock_any(merged);
     187        Value * const sameState = b->CreateICmpEQ(nextState, state);
     188        Value * const firstStride = b->CreateICmpEQ(index, ZERO);
     189        Value * const continuation = b->CreateOr(sameState, firstStride);
     190        Value * const nextIndex = b->CreateAdd(index, ONE);
     191        Value * const notLastStride = b->CreateICmpNE(nextIndex, mNumOfStrides);
     192        Value * const checkNextStride = b->CreateAnd(continuation, notLastStride);
     193        index->addIncoming(nextIndex, checkStride);
     194        first->addIncoming(first, checkStride);
     195        state->addIncoming(nextState, checkStride);
     196        b->CreateLikelyCondBr(checkNextStride, loopCond, processStrides);
     197
     198        // Process every stride between [first, index)
     199        b->SetInsertPoint(processStrides);
     200
     201        // build our kernel call
     202        args.reserve(mCurrentMethod->arg_size());
     203        args.push_back(nullptr); // handle
     204        args.push_back(b->CreateSub(index, first)); // numOfStrides
     205        for (unsigned i = 0; i < numOfInputs; i++) {
     206            const StreamSetBuffer * const buffer = mStreamSetInputBuffers[i];
     207            // logical base input address
     208            args.push_back(buffer->getBaseAddress(b.get()));
     209
     210            // processed input items
     211            const Binding & input = mInputStreamSets[i];
     212            if (isParamAddressable(input)) {
     213                args.push_back(mProcessedInputItemPtr[i]); // updatable
     214            }  else if (isParamConstant(input)) {
     215                args.push_back(b->CreateLoad(mProcessedInputItemPtr[i]));  // constant
     216            }
     217
     218            // accessible input items (after non-deferred processed item count)
     219            args.push_back(sizeTy);
     220
     221            if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresPopCountArray))) {
     222                args.push_back(b->CreateGEP(mPopCountRateArray[i], first));
     223            }
     224            if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresNegatedPopCountArray))) {
     225                args.push_back(b->CreateGEP(mNegatedPopCountRateArray[i], first));
     226            }
     227        }
     228
     229        // state is implicitly "indeterminate" during our first stride
     230        Value * const currentState = b->CreateSelect(firstStride, nextState, state);
     231        b->CreateCondBr(currentState, nonZeroPath, allZeroPath);
     232
     233    } else {
     234
     235        Value * const cond = b->getScalarField(CONDITION_TAG);
     236        const auto n = mCurrentMethod->arg_size();
     237        args.resize(n);
     238        auto arg = mCurrentMethod->arg_begin();
     239        for (unsigned i = 1; i != n; ++i) {
     240            assert (arg != mCurrentMethod->arg_end());
     241            args[i] = *(++arg);
     242        }
     243        assert (args[0] == nullptr);
     244        b->CreateCondBr(b->CreateIsNotNull(cond), nonZeroPath, allZeroPath);
     245    }
     246
     247    // make the actual calls and take any potential termination signal
     248    b->SetInsertPoint(nonZeroPath);
     249    callKernel(b, mTrueKernel, args, terminatedPhi);
     250    b->CreateBr(mergePaths);
     251
     252    b->SetInsertPoint(allZeroPath);
     253    callKernel(b, mFalseKernel, args, terminatedPhi);
     254    b->CreateBr(mergePaths);
     255
     256
     257#endif
     258}
     259
     260/** ------------------------------------------------------------------------------------------------------------- *
     261 * @brief generateFinalizeMethod
     262 ** ------------------------------------------------------------------------------------------------------------- */
    27263void OptimizationBranch::generateFinalizeMethod(const std::unique_ptr<KernelBuilder> & b) {
    28264    mTrueKernel->generateFinalizeMethod(b);
     
    30266}
    31267
     268/** ------------------------------------------------------------------------------------------------------------- *
     269 * @brief addAdditionalFunctions
     270 ** ------------------------------------------------------------------------------------------------------------- */
    32271void OptimizationBranch::addAdditionalFunctions(const std::unique_ptr<KernelBuilder> & b) {
    33272    mTrueKernel->addAdditionalFunctions(b);
     
    35274}
    36275
     276/** ------------------------------------------------------------------------------------------------------------- *
     277 * @brief finalizeInstance
     278 ** ------------------------------------------------------------------------------------------------------------- */
    37279Value * OptimizationBranch::finalizeInstance(const std::unique_ptr<KernelBuilder> & b) {
    38280
     
    46288
    47289void OptimizationBranch::addInternalKernelProperties(const std::unique_ptr<kernel::KernelBuilder> & b) {
    48 
     290    mTrueKernel->addInternalKernelProperties(b);
     291    mFalseKernel->addInternalKernelProperties(b);
    49292}
    50293
     
    53296}
    54297
    55 OptimizationBranch::OptimizationBranch(
     298OptimizationBranch::OptimizationBranch(const std::unique_ptr<KernelBuilder> & b,
    56299    std::string && signature,
    57     not_null<StreamSet *> condition,
    58     not_null<Kernel *> trueKernel,
    59     not_null<Kernel *> falseKernel,
     300    not_null<Relationship *> condition,
     301    not_null<Kernel *> nonZeroKernel,
     302    not_null<Kernel *> allZeroKernel,
    60303    Bindings && stream_inputs,
    61304    Bindings && stream_outputs,
    62305    Bindings && scalar_inputs,
    63306    Bindings && scalar_outputs)
    64 : Kernel(TypeId::OptimizationBranch, std::move(signature),
     307: Kernel(b, TypeId::OptimizationBranch, std::move(signature),
    65308         std::move(stream_inputs), std::move(stream_outputs),
    66309         std::move(scalar_inputs), std::move(scalar_outputs), {})
    67310, mCondition(condition.get())
    68 , mTrueKernel(trueKernel.get())
    69 , mFalseKernel(falseKernel.get()) {
    70 
    71 
     311, mTrueKernel(nonZeroKernel.get())
     312, mFalseKernel(allZeroKernel.get()) {
     313    addAttributesFrom({mTrueKernel, mFalseKernel});
    72314}
    73315
  • icGREP/icgrep-devel/icgrep/kernels/optimizationbranch.h

    r6253 r6261  
    1212public:
    1313
     14    const static std::string CONDITION_TAG;
     15
    1416    ~OptimizationBranch();
    1517
    1618protected:
    1719
    18     OptimizationBranch(std::string && signature,
    19                        not_null<StreamSet *> condition,
    20                        not_null<Kernel *> trueKernel,
    21                        not_null<Kernel *> falseKernel,
     20    OptimizationBranch(const std::unique_ptr<KernelBuilder> & b,
     21                       std::string && signature,
     22                       not_null<Relationship *> condition,
     23                       not_null<Kernel *> nonZeroKernel,
     24                       not_null<Kernel *> allZeroKernel,
    2225                       Bindings && stream_inputs,
    2326                       Bindings && stream_outputs,
     
    4548private:
    4649
    47     StreamSet * const       mCondition;
     50    void callKernel(const std::unique_ptr<KernelBuilder> & b,
     51                    const Kernel * const kernel, std::vector<llvm::Value *> & args,
     52                    llvm::PHINode * const terminatedPhi);
     53
     54private:
     55
     56    Relationship * const    mCondition;
    4857    Kernel * const          mTrueKernel;
    4958    Kernel * const          mFalseKernel;
  • icGREP/icgrep-devel/icgrep/kernels/p2s_kernel.cpp

    r6199 r6261  
    1010
    1111namespace kernel{
    12        
     12
    1313void p2s_step(const std::unique_ptr<KernelBuilder> & iBuilder, Value * p0, Value * p1, Value * hi_mask, unsigned shift, Value * &s1, Value * &s0) {
    1414    Value * t0 = iBuilder->simd_if(1, hi_mask, p0, iBuilder->simd_srli(16, p1, shift));
     
    4444    }
    4545}
    46                
     46
    4747void P2SKernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & b) {
    4848    const auto numOfStreams = getInputStreamSet("basisBits")->getNumElements();
     
    146146    }
    147147}
    148    
     148
    149149void P2S16KernelWithCompressedOutput::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & b) {
    150150    IntegerType * i32Ty = b->getInt32Ty();
     
    153153    ConstantInt * blockMask = b->getSize(b->getBitBlockWidth() - 1);
    154154    unsigned const unitsPerRegister = b->getBitBlockWidth()/16;
    155    
     155
    156156    Value * hi_input[8];
    157157    for (unsigned j = 0; j < 8; ++j) {
     
    202202
    203203
    204 P2SKernel::P2SKernel(const std::unique_ptr<kernel::KernelBuilder> &, StreamSet * basisBits, StreamSet * byteStream, cc::BitNumbering numbering)
    205 : BlockOrientedKernel("p2s" + cc::numberingSuffix(numbering),
     204P2SKernel::P2SKernel(const std::unique_ptr<kernel::KernelBuilder> & b, StreamSet * basisBits, StreamSet * byteStream, cc::BitNumbering numbering)
     205: BlockOrientedKernel(b, "p2s" + cc::numberingSuffix(numbering),
    206206{Binding{"basisBits", basisBits}},
    207207{Binding{"byteStream", byteStream}},
     
    215215                                                   StreamSet * const outputStream,
    216216                                                   cc::BitNumbering basisNumbering)
    217 : BlockOrientedKernel("p2sMultipleStreams" + cc::numberingSuffix(basisNumbering),
     217: BlockOrientedKernel(b, "p2sMultipleStreams" + cc::numberingSuffix(basisNumbering),
    218218{},
    219219{Binding{"byteStream", outputStream}},
     
    226226
    227227P2SKernelWithCompressedOutput::P2SKernelWithCompressedOutput(const std::unique_ptr<kernel::KernelBuilder> & b, cc::BitNumbering numbering)
    228 : BlockOrientedKernel("p2s_compress" + cc::numberingSuffix(numbering),
     228: BlockOrientedKernel(b, "p2s_compress" + cc::numberingSuffix(numbering),
    229229{Binding{b->getStreamSetTy(8, 1), "basisBits"}, Binding{b->getStreamSetTy(1, 1), "extractionMask"}},
    230230{Binding{b->getStreamSetTy(1, 8), "byteStream", BoundedRate(0, 1)}},
     
    234234}
    235235
    236 P2S16Kernel::P2S16Kernel(const std::unique_ptr<kernel::KernelBuilder> &, StreamSet *u16bits, StreamSet *u16bytes, cc::BitNumbering numbering)
    237 : BlockOrientedKernel("p2s_16" + cc::numberingSuffix(numbering),
     236P2S16Kernel::P2S16Kernel(const std::unique_ptr<kernel::KernelBuilder> & b, StreamSet *u16bits, StreamSet *u16bytes, cc::BitNumbering numbering)
     237: BlockOrientedKernel(b, "p2s_16" + cc::numberingSuffix(numbering),
    238238{Binding{"basisBits", u16bits}},
    239239{Binding{"i16Stream", u16bytes}},
     
    243243}
    244244
    245 P2S16KernelWithCompressedOutput::P2S16KernelWithCompressedOutput(const std::unique_ptr<kernel::KernelBuilder> &,
     245P2S16KernelWithCompressedOutput::P2S16KernelWithCompressedOutput(const std::unique_ptr<kernel::KernelBuilder> & b,
    246246                                                                 StreamSet * basisBits, StreamSet * extractionMask, StreamSet * i16Stream,
    247247                                                                 cc::BitNumbering numbering)
    248 : BlockOrientedKernel("p2s_16_compress" + cc::numberingSuffix(numbering),
     248: BlockOrientedKernel(b, "p2s_16_compress" + cc::numberingSuffix(numbering),
    249249{Binding{"basisBits", basisBits},
    250250Binding{"extractionMask", extractionMask}},
  • icGREP/icgrep-devel/icgrep/kernels/p2s_kernel.h

    r6199 r6261  
    1414class P2SKernel final : public BlockOrientedKernel {
    1515public:
    16     P2SKernel(const std::unique_ptr<kernel::KernelBuilder> &,
     16    P2SKernel(const std::unique_ptr<kernel::KernelBuilder> & b,
    1717              StreamSet * basisBits,
    1818              StreamSet * byteStream,
     
    5151class P2S16Kernel final : public BlockOrientedKernel {
    5252public:
    53     P2S16Kernel(const std::unique_ptr<kernel::KernelBuilder> &, StreamSet * u16bits, StreamSet * u16bytes, cc::BitNumbering basisNumbering = cc::BitNumbering::LittleEndian);
     53    P2S16Kernel(const std::unique_ptr<kernel::KernelBuilder> &b, StreamSet * u16bits, StreamSet * u16bytes, cc::BitNumbering basisNumbering = cc::BitNumbering::LittleEndian);
    5454    bool isCachable() const override { return true; }
    5555    bool hasSignature() const override { return false; }
     
    5858    void generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & b) override;
    5959};
    60    
     60
    6161class P2S16KernelWithCompressedOutput final : public BlockOrientedKernel {
    6262public:
    63     P2S16KernelWithCompressedOutput(const std::unique_ptr<kernel::KernelBuilder> &,
     63    P2S16KernelWithCompressedOutput(const std::unique_ptr<kernel::KernelBuilder> & b,
    6464                                    StreamSet * basisBits, StreamSet * fieldCounts, StreamSet * i16Stream,
    6565                                    cc::BitNumbering basisNumbering = cc::BitNumbering::LittleEndian);
     
    7070    void generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & b) override;
    7171};
    72    
     72
    7373}
    7474
  • icGREP/icgrep-devel/icgrep/kernels/pdep_kernel.cpp

    r6258 r6261  
    1919
    2020PDEPkernel::PDEPkernel(const std::unique_ptr<kernel::KernelBuilder> & b, const unsigned swizzleFactor, std::string name)
    21 : MultiBlockKernel(std::move(name),
     21: MultiBlockKernel(b, std::move(name),
    2222// input stream sets
    2323{Binding{b->getStreamSetTy(), "marker", FixedRate(), Principal()},
     
    146146                                       , StreamSet * expanded
    147147                                       , const unsigned FieldWidth)
    148 : MultiBlockKernel("streamExpand" + std::to_string(FieldWidth)
     148: MultiBlockKernel(b, "streamExpand" + std::to_string(FieldWidth)
    149149+ "_" + std::to_string(source->getNumElements())
    150150+ "_" + std::to_string(base) + "_" + std::to_string(expanded->getNumElements()),
     
    269269}
    270270
    271 FieldDepositKernel::FieldDepositKernel(const std::unique_ptr<kernel::KernelBuilder> &
     271FieldDepositKernel::FieldDepositKernel(const std::unique_ptr<kernel::KernelBuilder> & b
    272272                                       , StreamSet * mask, StreamSet * input, StreamSet * output
    273273                                       , const unsigned fieldWidth)
    274 : MultiBlockKernel("FieldDeposit" + std::to_string(fieldWidth) + "_" + std::to_string(input->getNumElements()),
     274: MultiBlockKernel(b, "FieldDeposit" + std::to_string(fieldWidth) + "_" + std::to_string(input->getNumElements()),
    275275{Binding{"depositMask", mask}
    276276, Binding{"inputStreamSet", input}},
     
    304304}
    305305
    306 PDEPFieldDepositKernel::PDEPFieldDepositKernel(const std::unique_ptr<kernel::KernelBuilder> &
     306PDEPFieldDepositKernel::PDEPFieldDepositKernel(const std::unique_ptr<kernel::KernelBuilder> & b
    307307                                               , StreamSet * mask, StreamSet * input, StreamSet * output
    308308                                               , const unsigned fieldWidth)
    309 : MultiBlockKernel("PDEPFieldDeposit" + std::to_string(fieldWidth) + "_" + std::to_string(input->getNumElements()) ,
     309: MultiBlockKernel(b, "PDEPFieldDeposit" + std::to_string(fieldWidth) + "_" + std::to_string(input->getNumElements()) ,
    310310                   {Binding{"depositMask", mask},
    311311                    Binding{"inputStreamSet", input}},
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/buffer_management_logic.hpp

    r6258 r6261  
    6464}
    6565
    66 inline BufferRateData PipelineCompiler::getBufferRateData(const unsigned index, const unsigned port, bool input) {
    67     const Kernel * kernel = nullptr;
    68     if (index < mPipeline.size()) {
    69         kernel = mPipeline[index];
    70     } else {
    71         assert (index == mPipeline.size());
    72         kernel = mPipelineKernel;
    73         input = !input;
    74     }
    75     const Binding & binding = input ? kernel->getInputStreamSetBinding(port) : kernel->getOutputStreamSetBinding(port);
     66BufferRateData PipelineCompiler::getBufferRateData(const Kernel * const kernel, const Binding & binding, const unsigned port) const {
    7667    const auto ub = upperBound(kernel, binding);
    7768    const auto lb = isConsistentRate(kernel, binding) ? ub : lowerBound(kernel, binding);
     
    8172void PipelineCompiler::enumerateBufferProducerBindings(const unsigned producer, const Bindings & bindings, BufferGraph & G, BufferMap & M) {
    8273    const auto n = bindings.size();
     74    const Kernel * const kernel = mPipeline[producer];
    8375    for (unsigned i = 0; i < n; ++i) {
    8476        const StreamSet * const rel = cast<StreamSet>(getRelationship(bindings[i]));
    8577        assert (M.count(rel) == 0);
    8678        const auto buffer = add_vertex(G);
    87         add_edge(producer, buffer, getBufferRateData(producer, i, false), G); // producer -> buffer ordering
     79        add_edge(producer, buffer, getBufferRateData(kernel, bindings[i], i), G); // producer -> buffer ordering
    8880        M.emplace(rel, buffer);
    8981    }
     
    9284void PipelineCompiler::enumerateBufferConsumerBindings(const unsigned consumer, const Bindings & bindings, BufferGraph & G, BufferMap & M) {
    9385    const auto n = bindings.size();
     86    const Kernel * const kernel = mPipeline[consumer];
    9487    for (unsigned i = 0; i < n; ++i) {
    9588        const StreamSet * const rel = cast<StreamSet>(getRelationship(bindings[i]));
    9689        const auto f = M.find(rel); assert (f != M.end());
    9790        const auto buffer = f->second;
    98         add_edge(buffer, consumer, getBufferRateData(consumer, i, true), G); // buffer -> consumer ordering
     91        add_edge(buffer, consumer, getBufferRateData(kernel, bindings[i], i), G); // buffer -> consumer ordering
    9992    }
    10093}
     
    10396 * @brief makePipelineBufferGraph
    10497 *
    105  * Return a cyclic bi-partite graph indicating the I/O relationships between the kernels and their buffers.
     98 * Return an acyclic bi-partite graph indicating the I/O relationships between the kernels and their buffers.
    10699 *
    107100 * Ordering: producer -> buffer -> consumer
     
    109102BufferGraph PipelineCompiler::makeBufferGraph(BuilderRef b) {
    110103
    111     const auto numOfKernels = mPipeline.size();
    112     const auto pipelineVertex = numOfKernels;
    113     BufferGraph G(numOfKernels + 1);
     104    const auto pipelineInput = 0;
     105    const auto pipelineOutput = mLastKernel;
     106    const auto firstBuffer = mLastKernel + 1;
     107
     108    BufferGraph G(mLastKernel + 1);
    114109    BufferMap M;
    115110
    116     // make an edge from the pipeline input to a buffer vertex
    117     enumerateBufferProducerBindings(pipelineVertex, mPipelineKernel->getInputStreamSetBindings(), G, M);
    118     G[pipelineVertex].Kernel = mPipelineKernel;
    119111    // make an edge from each producing kernel to a buffer vertex
    120     for (unsigned i = 0; i < numOfKernels; ++i) {
    121         const auto & producer = mPipeline[i];
    122         enumerateBufferProducerBindings(i, producer->getOutputStreamSetBindings(), G, M);
    123         G[i].Kernel = producer;
     112    enumerateBufferProducerBindings(pipelineInput, mPipelineKernel->getInputStreamSetBindings(), G, M);
     113    for (unsigned i = mFirstKernel; i < mLastKernel; ++i) {
     114        enumerateBufferProducerBindings(i, mPipeline[i]->getOutputStreamSetBindings(), G, M);
    124115    }
    125116    // make an edge from each buffer to its consuming kernel(s)
    126     for (unsigned i = 0; i < numOfKernels; ++i) {
    127         const auto & consumer = mPipeline[i];
    128         enumerateBufferConsumerBindings(i, consumer->getInputStreamSetBindings(), G, M);
     117    for (unsigned i = mFirstKernel; i < mLastKernel; ++i) {
     118        enumerateBufferConsumerBindings(i, mPipeline[i]->getInputStreamSetBindings(), G, M);
    129119    }
    130120    // make an edge from a buffer vertex to each pipeline output
    131     enumerateBufferConsumerBindings(pipelineVertex, mPipelineKernel->getOutputStreamSetBindings(), G, M);
    132 
    133     const auto firstBuffer = numOfKernels + 1;
     121    enumerateBufferConsumerBindings(pipelineOutput, mPipelineKernel->getOutputStreamSetBindings(), G, M);
     122
    134123    const auto lastBuffer = num_vertices(G);
    135124
     
    139128
    140129    // compute how much data each kernel could consume/produce per iteration.
    141     for (unsigned i = 0; i < numOfKernels; ++i) {
     130    for (unsigned i = mFirstKernel; i < mLastKernel; ++i) {
    142131        if (LLVM_LIKELY(in_degree(i, G) > 0)) {
    143132
     
    146135
    147136            BufferNode & kn = G[i];
    148             assert(kn.Kernel);
    149137
    150138            for (const auto ce : make_iterator_range(in_edges(i, G))) {
     
    176164
    177165    // fill in any known pipeline I/O buffers
    178     for (const auto e : make_iterator_range(out_edges(pipelineVertex, G))) {
     166    for (const auto e : make_iterator_range(out_edges(pipelineInput, G))) {
    179167        const auto bufferVertex = target(e, G);
    180168        BufferNode & bn = G[bufferVertex];
     
    185173    }
    186174
    187     for (const auto e : make_iterator_range(in_edges(pipelineVertex, G))) {
     175    for (const auto e : make_iterator_range(in_edges(pipelineOutput, G))) {
    188176        const auto bufferVertex = source(e, G);
    189177        BufferNode & bn = G[bufferVertex];
     
    208196
    209197
    210         const BufferNode & producerNode = G[producerVertex];
    211         const auto & producer = producerNode.Kernel;
     198        const Kernel * const producer = mPipeline[producerVertex];
    212199        const BufferRateData & producerRate = G[pe];
    213200        const Binding & output = getOutputBinding(producer, producerRate.Port);
     
    236223                const auto c = target(ce, G);
    237224                const BufferNode & consumerNode = G[c];
    238                 const Kernel * const consumer = consumerNode.Kernel; assert (consumer);
     225                const Kernel * const consumer = mPipeline[c]; assert (consumer);
    239226                const Binding & input = getInputBinding(consumer, consumerRate.Port);
    240227                facsimileSpace = std::max(facsimileSpace, getOutputOverflowSize(consumer, input, consumerRate));
     
    285272void PipelineCompiler::printBufferGraph(const BufferGraph & G, raw_ostream & out) {
    286273
    287     const auto numOfKernels = mPipeline.size();
    288     const auto firstBuffer = numOfKernels + 1;
     274    const auto pipelineInput = 0;
     275    const auto pipelineOutput = mLastKernel;
     276    const auto firstBuffer = mLastKernel + 1;
    289277    const auto lastBuffer = num_vertices(G);
    290278
    291279    out << "digraph G {\n";
    292280
    293     for (unsigned i = 0; i < numOfKernels; ++i) {
     281    out << "v" << pipelineInput << " [label=\"P_{in}\" shape=box];\n";
     282
     283    for (unsigned i = mFirstKernel; i < mLastKernel; ++i) {
    294284        const Kernel * const kernel = mPipeline[i]; assert(kernel);
    295         out << "v" << i << " [label=\"" << i << ": " <<  kernel->getName()  << "\" shape=box];\n";
    296     }
    297 
    298     out << "v" << numOfKernels << " [label=\"" << numOfKernels << ": pipeline\" shape=box];\n";
     285        out << "v" << i << " [label=\"" << i << ": K_{" << i << "}  " <<  kernel->getName()  << "\" shape=box];\n";
     286    }
     287
     288    out << "v" << pipelineOutput << " [label=\"P_{out}\" shape=box];\n";
    299289
    300290    for (unsigned i = firstBuffer; i != lastBuffer; ++i) {
     
    305295        } else if (isa<ExternalBuffer>(bn.Buffer)) {
    306296            out << 'E';
    307         } else if (isa<DynamicBuffer>(bn.Buffer)) {
    308             out << 'D';
    309         } else if (isa<StaticBuffer>(bn.Buffer)) {
    310             out << 'S';
     297        } else if (DynamicBuffer * buffer = dyn_cast<DynamicBuffer>(bn.Buffer)) {
     298            out << 'D' << buffer->getInitialCapacity() << 'x' << buffer->getNumOfStreams();
     299        } else if (StaticBuffer * buffer = dyn_cast<StaticBuffer>(bn.Buffer)) {
     300            out << 'S' << buffer->getCapacity() << 'x' << buffer->getNumOfStreams();
    311301        }
    312302        if (bn.Overflow || bn.Fasimile) {
     
    315305        out << "\"];\n";
    316306    }
    317 
    318     std::vector<Kernel *> P(mPipeline);
    319     P.push_back(mPipelineKernel);
    320307
    321308    for (auto e : make_iterator_range(edges(G))) {
     
    335322        out << '\n';
    336323
    337         if (s <= numOfKernels) {
    338             // producer edge
    339             const Kernel * const k = G[s].Kernel;
     324        if (s < t) { // producer edge
     325            const Kernel * const k = mPipeline[s];
    340326            out << k->getName() << "." << getOutputBinding(k, pd.Port).getName();
    341         } else {
    342             assert (t <= numOfKernels);
    343             const Kernel * const k = G[t].Kernel;
     327        } else { // consumer edge
     328            const Kernel * const k = mPipeline[t];
    344329            out << k->getName() << "." << getInputBinding(k, pd.Port).getName();
    345330        }
     
    378363void PipelineCompiler::constructBuffers(BuilderRef b) {
    379364
    380     const auto numOfKernels = mPipeline.size();
    381     const auto firstBuffer = numOfKernels + 1;
     365    const auto firstBuffer = mLastKernel + 1;
    382366    const auto lastBuffer = num_vertices(mBufferGraph);
    383367
     
    389373            const auto pe = in_edge(i, mBufferGraph);
    390374            const auto p = source(pe, mBufferGraph);
    391             const auto & producer = mPipeline[p];
     375            const Kernel * const producer = mPipeline[p];
    392376            const Binding & output = producer->getOutputStreamSetBinding(mBufferGraph[pe].Port);
    393377            const auto name = makeBufferName(p, output);
     
    429413 ** ------------------------------------------------------------------------------------------------------------- */
    430414inline void PipelineCompiler::releaseBuffers(BuilderRef b) {
    431     const auto firstBuffer = mPipeline.size() + 1;
     415    const auto firstBuffer = mLastKernel + 1;
    432416    const auto lastBuffer = num_vertices(mBufferGraph);
    433417    for (auto bufferVertex = firstBuffer; bufferVertex != lastBuffer; ++bufferVertex) {
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/consumer_logic.hpp

    r6253 r6261  
    9292Value * PipelineCompiler::getConsumedItemCount(BuilderRef b, const unsigned outputPort) {
    9393    Value * consumed = nullptr;
    94     const auto bufferVertex = getOutputBufferVertex(mKernelIndex, outputPort);
     94    const auto bufferVertex = getOutputBufferVertex(outputPort);
    9595    if (LLVM_UNLIKELY(out_degree(bufferVertex, mConsumerGraph) == 0)) {
    9696        // This stream either has no consumers or we've proven that its consumption rate
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/core_logic.hpp

    r6258 r6261  
    1 #include "pipeline_compiler.hpp"
     1#include "pipeline_compiler.hpp"
    22
    33namespace kernel {
    4 
    5 /** ------------------------------------------------------------------------------------------------------------- *
    6  * @brief addPipelineKernelProperties
    7  ** ------------------------------------------------------------------------------------------------------------- */
    8 inline void PipelineCompiler::addPipelineKernelProperties(BuilderRef b) {
    9     initializePopCounts();
    10     const auto numOfKernels = mPipeline.size();
    11     b->setKernel(mPipelineKernel);
    12     for (unsigned i = 0; i < numOfKernels; ++i) {
    13         addBufferHandlesToPipelineKernel(b, i);
    14         addInternalKernelProperties(b, i);
    15         addConsumerKernelProperties(b, i);
    16         addPopCountScalarsToPipelineKernel(b, i);
    17     }
    18     b->setKernel(mPipelineKernel);
    19 }
    20 
    21 /** ------------------------------------------------------------------------------------------------------------- *
    22  * @brief addInternalKernelProperties
    23  ** ------------------------------------------------------------------------------------------------------------- */
    24 inline void PipelineCompiler::addInternalKernelProperties(BuilderRef b, const unsigned kernelIndex) {
    25 
    26     IntegerType * const sizeTy = b->getSizeTy();
    27 
    28     const auto name = makeKernelName(kernelIndex);
    29     // TODO: prove two termination signals can be fused into a single counter?
    30     mPipelineKernel->addInternalScalar(sizeTy, name + TERMINATION_SIGNAL_SUFFIX);
    31     mPipelineKernel->addInternalScalar(sizeTy, name + LOGICAL_SEGMENT_SUFFIX);
    32 
    33     // TODO: non deferred item count for fixed rates could be calculated from total # of segments.
    34     const Kernel * const kernel = mPipeline[kernelIndex];
    35     const auto numOfInputs = kernel->getNumOfStreamInputs();
    36     for (unsigned i = 0; i < numOfInputs; i++) {
    37         const Binding & input = kernel->getInputStreamSetBinding(i);
    38         const auto prefix = makeBufferName(kernelIndex, input);
    39         if (input.isDeferred()) {
    40             mPipelineKernel->addInternalScalar(sizeTy, prefix + DEFERRED_ITEM_COUNT_SUFFIX);
    41         }
    42         mPipelineKernel->addInternalScalar(sizeTy, prefix + ITEM_COUNT_SUFFIX);
    43     }
    44 
    45     const auto numOfOutputs = kernel->getNumOfStreamOutputs();
    46     for (unsigned i = 0; i < numOfOutputs; i++) {
    47         const Binding & output = kernel->getOutputStreamSetBinding(i);
    48         const auto prefix = makeBufferName(kernelIndex, output);
    49         mPipelineKernel->addInternalScalar(sizeTy, prefix + ITEM_COUNT_SUFFIX);
    50     }
    51 
    52 }
    53 
    54 /** ------------------------------------------------------------------------------------------------------------- *
    55  * @brief generateInitializeMethod
    56  ** ------------------------------------------------------------------------------------------------------------- */
    57 void PipelineCompiler::generateInitializeMethod(BuilderRef b) {
    58     const auto numOfKernels = mPipeline.size();
    59     for (unsigned i = 0; i < numOfKernels; ++i) {
    60         mPipeline[i]->addKernelDeclarations(b);
    61     }
    62     for (unsigned i = 0; i < numOfKernels; ++i) {
    63         Kernel * const kernel = mPipeline[i];
    64         if (!kernel->hasFamilyName()) {
    65             Value * const handle = kernel->createInstance(b);
    66             b->setScalarField(makeKernelName(i), handle);
    67         }
    68     }
    69     constructBuffers(b);
    70     std::vector<Value *> args;
    71     for (unsigned i = 0; i < numOfKernels; ++i) {
    72         setActiveKernel(b, i);
    73         args.resize(in_degree(i, mScalarDependencyGraph) + 1);
    74         #ifndef NDEBUG
    75         std::fill(args.begin(), args.end(), nullptr);
    76         #endif
    77         args[0] = mKernel->getHandle();
    78         b->setKernel(mPipelineKernel);
    79         for (const auto ce : make_iterator_range(in_edges(i, mScalarDependencyGraph))) {
    80             const auto j = mScalarDependencyGraph[ce] + 1;
    81             const auto pe = in_edge(source(ce, mScalarDependencyGraph), mScalarDependencyGraph);
    82             const auto k = mScalarDependencyGraph[pe];
    83             const Binding & input = mPipelineKernel->getInputScalarBinding(k);
    84             assert (args[j] == nullptr);
    85             args[j] = b->getScalarField(input.getName());
    86         }
    87         b->setKernel(mKernel);
    88         Value * const terminatedOnInit = b->CreateCall(getInitializationFunction(b), args);
    89         if (mKernel->canSetTerminateSignal()) {
    90             setTerminated(b, terminatedOnInit);
    91         }
    92     }
    93 }
    944
    955/** ------------------------------------------------------------------------------------------------------------- *
     
    10818
    10919    b->SetInsertPoint(mPipelineLoop);
    110     mSegNo = b->CreatePHI(b->getSizeTy(), 2, "segNo");
     20    IntegerType * const sizeTy = b->getSizeTy();
     21    ConstantInt * const ZERO = b->getSize(0);
     22    ConstantInt * const NOT_TERMINATED = b->getSize(NotTerminated);
     23
     24    mSegNo = b->CreatePHI(sizeTy, 2, "segNo");
    11125    mSegNo->addIncoming(initialSegNo, entryBlock);
    112     if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    113         mDeadLockCounter = b->CreatePHI(b->getSizeTy(), 2, "deadLockCounter");
    114         mDeadLockCounter->addIncoming(b->getSize(0), entryBlock);
    115         mPipelineProgress = b->getFalse();
    116     }
     26    mProgressCounter = b->CreatePHI(sizeTy, 2, "progressCounter");
     27    mProgressCounter->addIncoming(ZERO, entryBlock);
     28    mPipelineProgress = b->getFalse();
     29    // any pipeline input streams are considered produced by the P_{in} vertex.
     30    mTerminationGraph[0] = mPipelineKernel->isFinal();
     31
     32    mPipelineTerminated = NOT_TERMINATED;
    11733    #ifdef PRINT_DEBUG_MESSAGES
    11834    b->CallPrintInt("+++ pipeline start +++", mSegNo);
     
    210126    b->SetInsertPoint(mKernelLoopCall);
    211127    writeKernelCall(b);
    212 
    213     BasicBlock * const copyBack =
    214             b->CreateBasicBlock(prefix + "_copyBack", mKernelTerminationCheck);
     128    writeCopyBackLogic(b);
     129
    215130    BasicBlock * const abnormalTermination =
    216131            b->CreateBasicBlock(prefix + "_abnormalTermination", mKernelTerminationCheck);
     
    218133    // If the kernel explicitly terminates, it must set its processed/produced item counts.
    219134    // Otherwise, the pipeline will update any countable rates, even upon termination.
    220     b->CreateUnlikelyCondBr(mTerminationExplicitly, abnormalTermination, copyBack);
    221 
    222     /// -------------------------------------------------------------------------------------
    223     /// KERNEL COPY BACK
    224     /// -------------------------------------------------------------------------------------
    225 
    226     b->SetInsertPoint(copyBack);
    227     writeCopyBackLogic(b);
    228     b->CreateBr(mKernelTerminationCheck);
     135    b->CreateUnlikelyCondBr(mTerminatedExplicitly, abnormalTermination, mKernelTerminationCheck);
    229136
    230137    /// -------------------------------------------------------------------------------------
     
    249156    b->SetInsertPoint(mKernelTerminated);
    250157    zeroFillPartiallyWrittenOutputStreams(b);
    251     setTerminated(b, b->getTrue());
    252     updatePhisAfterTermination(b);
     158    Value * mode = setTerminated(b, mTerminatedExplicitly, TerminatedExplicitly, TerminatedNormally);
     159    updatePhisAfterTermination(b, mode);
    253160    b->CreateBr(mKernelLoopExit);
    254161
     
    289196void PipelineCompiler::end(BuilderRef b, const unsigned step) {
    290197    b->setKernel(mPipelineKernel);
    291     if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    292         ConstantInt * const ZERO = b->getSize(0);
    293         ConstantInt * const ONE = b->getSize(1);
    294         ConstantInt * const TWO = b->getSize(2);
    295         Value * const plusOne = b->CreateAdd(mDeadLockCounter, ONE);
    296         Value * const newCount = b->CreateSelect(mPipelineProgress, ZERO, plusOne);
    297         b->CreateAssert(b->CreateICmpNE(newCount, TWO),
    298                         "Dead lock detected: pipeline could not progress after two iterations");
    299         mDeadLockCounter->addIncoming(newCount, b->GetInsertBlock());
    300     }
     198
     199    ConstantInt * const ZERO = b->getSize(0);
     200    ConstantInt * const ONE = b->getSize(1);
     201    ConstantInt * const TWO = b->getSize(2);
     202    Value * const plusOne = b->CreateAdd(mProgressCounter, ONE);
     203    Value * const newProgressCounter = b->CreateSelect(mPipelineProgress, ZERO, plusOne);
     204    Value * const noProgress = b->CreateICmpEQ(newProgressCounter, TWO);
     205
     206    const auto pipelineOutput = mLastKernel;
     207
    301208    // check whether every sink has terminated
    302209    Value * allTerminated = b->getTrue();
    303     const auto pipelineOutputVertex = mPipeline.size();
    304     for (const auto e : make_iterator_range(in_edges(pipelineOutputVertex, mTerminationGraph))) {
    305         const auto u = source(e, mTerminationGraph);
    306         assert (mTerminationGraph[u]);
    307         allTerminated = b->CreateAnd(allTerminated, mTerminationGraph[u]);
    308     }
    309     // or if any output stream of this pipeline cannot support a full stride
    310     Value * notEnoughSpace = b->getFalse();
    311     for (const auto e : make_iterator_range(in_edges(pipelineOutputVertex, mBufferGraph))) {
    312 
    313         const auto bufferVertex = source(e, mBufferGraph);
    314         const BufferNode & bn = mBufferGraph[bufferVertex];
    315         const ExternalBuffer * const buffer = cast<ExternalBuffer>(bn.Buffer);
    316 
    317         Value * const produced = bn.TotalItems; assert (produced);
    318         Value * const consumed = b->getSize(0);  assert (consumed);
    319         Value * const writable = buffer->getLinearlyWritableItems(b, produced, consumed);
    320 
    321         const BufferRateData & rd = mBufferGraph[e];
    322         const auto outputPort = rd.Port;
    323 
    324         // NOTE: this method doesn't check a popcount's ref stream to determine how many
    325         // items we actually require. Instead it just calculates them as bounded rates.
    326         // To support a precise bound, we'd need to produce more ref items than the kernel
    327         // that writes to this output actually consumes. Since this effectively adds a
    328         // delay equivalent to a LookAhead of a full stride, this doesn't seem useful.
    329         const Binding & output = mPipelineKernel->getOutputStreamSetBinding(outputPort);
    330         Value * const strideLength = getMaximumStrideLength(b, mPipelineKernel, output);
    331         notEnoughSpace = b->CreateOr(b->CreateICmpULT(writable, strideLength), notEnoughSpace);
    332     }
    333     b->setKernel(mPipelineKernel);
    334     Value * const done = b->CreateOr(allTerminated, notEnoughSpace);
     210    for (const auto e : make_iterator_range(in_edges(pipelineOutput, mTerminationGraph))) {
     211        const auto kernelVertex = source(e, mTerminationGraph);
     212        Value * const kernelState = mTerminationGraph[kernelVertex];
     213        Value * const terminated = b->CreateICmpNE(kernelState, b->getSize(NotTerminated));
     214        allTerminated = b->CreateAnd(allTerminated, terminated);
     215    }
     216
     217    Value * done = allTerminated;
     218    if (nestedPipeline()) {
     219        done = b->CreateOr(allTerminated, noProgress);
     220    } else if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
     221        b->CreateAssertZero(noProgress,
     222            "Dead lock detected: pipeline could not progress after two iterations");
     223    }
     224
    335225    #ifdef PRINT_DEBUG_MESSAGES
    336226    Constant * const ONES = Constant::getAllOnesValue(mSegNo->getType());
     
    339229
    340230    Value * const nextSegNo = b->CreateAdd(mSegNo, b->getSize(step));
    341     mSegNo->addIncoming(nextSegNo, b->GetInsertBlock());
     231    BasicBlock * const exitBlock = b->GetInsertBlock();
     232    mSegNo->addIncoming(nextSegNo, exitBlock);
     233    mProgressCounter->addIncoming(newProgressCounter, exitBlock);
    342234    b->CreateUnlikelyCondBr(done, mPipelineEnd, mPipelineLoop);
    343235
    344236    b->SetInsertPoint(mPipelineEnd);
    345237    mSegNo = nullptr;
    346 }
    347 
    348 /** ------------------------------------------------------------------------------------------------------------- *
    349  * @brief generateFinalizeMethod
    350  ** ------------------------------------------------------------------------------------------------------------- */
    351 void PipelineCompiler::generateFinalizeMethod(BuilderRef b) {
    352     printOptionalCycleCounter(b);
    353     const auto numOfKernels = mPipeline.size();
    354     mOutputScalars.resize(numOfKernels);
    355     for (unsigned i = 0; i < numOfKernels; ++i) {
    356         setActiveKernel(b, i);
    357         loadBufferHandles(b);
    358         mOutputScalars[i] = b->CreateCall(getFinalizeFunction(b), mKernel->getHandle());
    359     }
    360     releaseBuffers(b);
    361 }
    362 
    363 /** ------------------------------------------------------------------------------------------------------------- *
    364  * @brief writeOutputScalars
    365  ** ------------------------------------------------------------------------------------------------------------- */
    366 void PipelineCompiler::writeOutputScalars(BuilderRef b, const unsigned u, std::vector<Value *> & args) {
    367     args.clear();
    368     const auto n = in_degree(u, mScalarDependencyGraph);
    369     args.resize(n, nullptr);
    370     const auto numOfKernels = mPipeline.size();
    371     for (const auto e : make_iterator_range(in_edges(u, mScalarDependencyGraph))) {
    372         const auto bufferVertex = source(e, mScalarDependencyGraph);
    373         if (LLVM_LIKELY(mScalarDependencyGraph[bufferVertex] == nullptr)) {
    374             const auto producer = in_edge(source(e, mScalarDependencyGraph), mScalarDependencyGraph);
    375             const auto i = source(producer, mScalarDependencyGraph);
    376             const auto j = mScalarDependencyGraph[producer];
    377             Value * val = nullptr;
    378             if (i == numOfKernels) {
    379                 const Binding & input = mPipelineKernel->getInputScalarBinding(j);
    380                 val = b->getScalarField(input.getName());
    381             } else { // output scalar of some kernel
    382                 Value * const outputScalars = mOutputScalars[i]; assert (outputScalars);
    383                 if (outputScalars->getType()->isAggregateType()) {
    384                     val = b->CreateExtractValue(outputScalars, {j});
    385                 } else { assert (j == 0 && "scalar type is not an aggregate");
    386                     val = outputScalars;
    387                 }
    388             }
    389             mScalarDependencyGraph[bufferVertex] = val;
    390         }
    391         const auto k = mScalarDependencyGraph[e];
    392         assert (args[k] == nullptr);
    393         args[k] = mScalarDependencyGraph[bufferVertex];
    394     }
    395 }
    396 
    397 /** ------------------------------------------------------------------------------------------------------------- *
    398  * @brief getFinalOutputScalars
    399  ** ------------------------------------------------------------------------------------------------------------- */
    400 std::vector<Value *> PipelineCompiler::getFinalOutputScalars(BuilderRef b) {
    401     const auto numOfKernels = mPipeline.size();
    402     const auto & calls = mPipelineKernel->getCallBindings();
    403     const auto numOfCalls = calls.size();
    404     std::vector<Value *> args;
    405238    b->setKernel(mPipelineKernel);
    406     for (unsigned k = 0; k < numOfCalls; ++k) {
    407         writeOutputScalars(b, numOfKernels + k + 1, args);
    408         Function * const f = cast<Function>(calls[k].Callee);
    409         auto i = f->arg_begin();
    410         for (auto j = args.begin(); j != args.end(); ++i, ++j) {
    411             assert (i != f->arg_end());
    412             *j = b->CreateZExtOrTrunc(*j, i->getType());
    413         }
    414         assert (i == f->arg_end());
    415         b->CreateCall(f, args);
    416     }
    417     writeOutputScalars(b, numOfKernels, args);
    418     return args;
     239
     240// TODO: not correct for threaded pipelines
     241//    if (mPipelineKernel->canSetTerminateSignal()) {
     242//        Value * const terminatedPtr = mPipelineKernel->getTerminationSignalPtr();
     243//        b->CreateStore(allTerminated, terminatedPtr);
     244//    }
    419245}
    420246
     
    445271    // Since we may loop and call the kernel again, we want to mark that we've progressed
    446272    // if we execute any kernel even if we could not complete a full segment.
    447     if (mPipelineProgress) {
    448         const auto prefix = makeKernelName(mKernelIndex);
    449         mAlreadyProgressedPhi = b->CreatePHI(b->getInt1Ty(), 2, prefix + "_madeProgress");
    450         mAlreadyProgressedPhi->addIncoming(mPipelineProgress, mKernelEntry);
    451     }
     273    const auto prefix = makeKernelName(mKernelIndex);
     274    mAlreadyProgressedPhi = b->CreatePHI(b->getInt1Ty(), 2, prefix + "_madeProgress");
     275    mAlreadyProgressedPhi->addIncoming(mPipelineProgress, mKernelEntry);
    452276}
    453277
     
    498322    b->SetInsertPoint(mKernelLoopExit);
    499323    const auto prefix = makeKernelName(mKernelIndex);
    500     mTerminatedPhi = b->CreatePHI(b->getInt1Ty(), 2, prefix + "_terminated");
    501     if (mPipelineProgress) {
    502         mHasProgressedPhi = b->CreatePHI(b->getInt1Ty(), 2, prefix + "_anyProgress");
    503     }
    504     Type * const sizeTy = b->getSizeTy();
     324    IntegerType * const sizeTy = b->getSizeTy();
     325    mTerminatedPhi = b->CreatePHI(sizeTy, 2, prefix + "_terminated");
     326    mHasProgressedPhi = b->CreatePHI(b->getInt1Ty(), 2, prefix + "_anyProgress");
     327
    505328    const auto numOfInputs = mKernel->getNumOfStreamInputs();
    506329    for (unsigned i = 0; i < numOfInputs; ++i) {
     
    526349    b->SetInsertPoint(mKernelExit);
    527350    const auto prefix = makeKernelName(mKernelIndex);
    528     PHINode * const terminated = b->CreatePHI(b->getInt1Ty(), 2, prefix + "_terminated");
    529     terminated->addIncoming(b->getTrue(), mKernelEntry);
     351    IntegerType * const sizeTy = b->getSizeTy();
     352
     353    PHINode * const terminated = b->CreatePHI(sizeTy, 2, prefix + "_terminated");
     354    terminated->addIncoming(mTerminatedInitially, mKernelEntry);
    530355    terminated->addIncoming(mTerminatedPhi, mKernelLoopExitPhiCatch);
    531356    mTerminationGraph[mKernelIndex] = terminated;
    532     if (mPipelineProgress) {
    533         PHINode * const pipelineProgress = b->CreatePHI(b->getInt1Ty(), 2, prefix + "_pipelineProgress");
    534         pipelineProgress->addIncoming(mPipelineProgress, mKernelEntry);
    535         pipelineProgress->addIncoming(mHasProgressedPhi, mKernelLoopExitPhiCatch);
    536         mPipelineProgress = pipelineProgress;
    537     }
     357
     358    PHINode * const pipelineProgress = b->CreatePHI(b->getInt1Ty(), 2, prefix + "_pipelineProgress");
     359    pipelineProgress->addIncoming(mPipelineProgress, mKernelEntry);
     360    pipelineProgress->addIncoming(mHasProgressedPhi, mKernelLoopExitPhiCatch);
     361    mPipelineProgress = pipelineProgress;
     362
    538363    createConsumedPhiNodes(b);
    539364    const auto numOfOutputs = mKernel->getNumOfStreamOutputs();
    540     Type * const sizeTy = b->getSizeTy();
    541365    for (unsigned i = 0; i < numOfOutputs; ++i) {
    542366        const Binding & output = mKernel->getOutputStreamSetBinding(i);
     
    584408            mUpdatedProducedPhi[i]->addIncoming(mProducedItemCount[i], entryBlock);
    585409        }
    586         if (mHasProgressedPhi) {
    587             mHasProgressedPhi->addIncoming(b->getTrue(), entryBlock);
    588         }
    589         mTerminatedPhi->addIncoming(b->getFalse(), entryBlock);
     410        mHasProgressedPhi->addIncoming(b->getTrue(), entryBlock);
     411        mTerminatedPhi->addIncoming(b->getSize(NotTerminated), entryBlock);
    590412        b->CreateBr(mKernelLoopExit);
    591413    }
     
    595417 * @brief getInitialTerminationSignal
    596418 ** ------------------------------------------------------------------------------------------------------------- */
    597 inline Value * PipelineCompiler::initiallyTerminated(BuilderRef b) const {
     419inline Value * PipelineCompiler::initiallyTerminated(BuilderRef b) {
    598420    b->setKernel(mPipelineKernel);
    599421    const auto prefix = makeKernelName(mKernelIndex);
    600     Value * const terminated = b->getScalarField(prefix + TERMINATION_SIGNAL_SUFFIX);
     422    mTerminatedInitially = b->getScalarField(prefix + TERMINATION_SIGNAL_SUFFIX);
    601423    b->setKernel(mKernel);
    602     return b->CreateICmpNE(terminated, b->getSize(0));
     424    return b->CreateICmpNE(mTerminatedInitially, b->getSize(NotTerminated));
    603425}
    604426
     
    606428 * @brief setTerminated
    607429 ** ------------------------------------------------------------------------------------------------------------- */
    608 inline void PipelineCompiler::setTerminated(BuilderRef b, Value * const value) {
     430Value * PipelineCompiler::setTerminated(BuilderRef b, Value * const condition, const TerminationMode trueMode, const TerminationMode falseMode) const {
    609431    const auto prefix = makeKernelName(mKernelIndex);
    610432    b->setKernel(mPipelineKernel);
    611     b->setScalarField(prefix + TERMINATION_SIGNAL_SUFFIX, b->CreateZExtOrTrunc(value, b->getSizeTy()));
     433    ConstantInt * const TRUE_MODE = b->getSize(trueMode);
     434    ConstantInt * const FALSE_MODE = b->getSize(falseMode);
     435    Value * const mode = b->CreateSelect(condition, TRUE_MODE, FALSE_MODE);
     436    b->setScalarField(prefix + TERMINATION_SIGNAL_SUFFIX, mode);
    612437    #ifdef PRINT_DEBUG_MESSAGES
    613     b->CallPrintInt("*** " + prefix + "_terminated ***", value);
     438    b->CallPrintInt("*** " + prefix + "_terminated ***", mode);
    614439    #endif
    615440    b->setKernel(mKernel);
     441    return mode;
    616442}
    617443
     
    619445 * @brief updatePhiCountAfterTermination
    620446 ** ------------------------------------------------------------------------------------------------------------- */
    621 inline void PipelineCompiler::updatePhisAfterTermination(BuilderRef b) {
     447inline void PipelineCompiler::updatePhisAfterTermination(BuilderRef b, Value * const terminationMode) {
    622448    BasicBlock * const exitBlock = b->GetInsertBlock();
    623     assert (mTerminatedPhi);
    624     mTerminatedPhi->addIncoming(b->getTrue(), exitBlock);
    625     if (mHasProgressedPhi) {
    626         mHasProgressedPhi->addIncoming(b->getTrue(), exitBlock);
    627     }
     449    mTerminatedPhi->addIncoming(terminationMode, exitBlock);
     450    mHasProgressedPhi->addIncoming(b->getTrue(), exitBlock);
    628451    const auto numOfInputs = mKernel->getNumOfStreamInputs();
    629452    for (unsigned i = 0; i < numOfInputs; ++i) {
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/cycle_counter_logic.hpp

    r6184 r6261  
    3838        Value* FP_100 = ConstantFP::get(b->getDoubleTy(), 100.0);
    3939        Value* totalCycles = b->getSize(0);
    40         for (const auto & kernel : mPipeline) {
     40        for (unsigned i = mFirstKernel; i < mLastKernel; ++i) {
     41            const Kernel * const kernel = mPipeline[i];
    4142            b->setKernel(kernel);
    4243            Value * cycles = b->CreateLoad(b->getCycleCountPtr());
     
    4546        Value* fTotalCycle = b->CreateUIToFP(totalCycles, b->getDoubleTy());
    4647
    47         for (const auto & kernel : mPipeline) {
     48        for (unsigned i = mFirstKernel; i < mLastKernel; ++i) {
     49            const Kernel * const kernel = mPipeline[i];
    4850            b->setKernel(kernel);
    4951            const auto & inputs = kernel->getInputStreamSetBindings();
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/kernel_logic.hpp

    r6258 r6261  
    1616 ** ------------------------------------------------------------------------------------------------------------- */
    1717inline void PipelineCompiler::setActiveKernel(BuilderRef b, const unsigned index) {
    18     assert (index < mPipeline.size());
     18    assert (index >= mFirstKernel && index < mLastKernel);
    1919    mKernelIndex = index;
    2020    mKernel = mPipeline[index];
     
    5858    const Binding & input = mKernel->getInputStreamSetBinding(inputPort);
    5959    const auto prefix = makeBufferName(mKernelIndex, input);
     60    Value * const hasEnough = b->CreateICmpUGE(accessible, requiredInput);
     61    Value * const hasTerminated = producerTerminated(b, inputPort);
     62    Value * const sufficientInput = b->CreateOr(hasEnough, hasTerminated);
    6063    #ifdef PRINT_DEBUG_MESSAGES
    6164    b->CallPrintInt(prefix + "_accessible", accessible);
    6265    b->CallPrintInt(prefix + "_requiredInput", requiredInput);
     66    b->CallPrintInt(prefix + "_sufficientInput", sufficientInput);
    6367    #endif
    64     Value * const hasEnough = b->CreateICmpUGE(accessible, requiredInput);
    65     Value * const hasTerminated = producerTerminated(inputPort);
    66     Value * const sufficientInput = b->CreateOr(hasEnough, hasTerminated);
    6768    mAccessibleInputItems[inputPort] = accessible;
    6869    BasicBlock * const target = b->CreateBasicBlock(prefix + "_hasInputData", mKernelLoopCall);
     
    7374 * @brief producerTerminated
    7475 ** ------------------------------------------------------------------------------------------------------------- */
    75 inline Value * PipelineCompiler::producerTerminated(const unsigned inputPort) const {
     76inline Value * PipelineCompiler::producerTerminated(BuilderRef b, const unsigned inputPort) const {
    7677    const auto bufferVertex = getInputBufferVertex(inputPort);
    7778    const auto producerVertex = parent(bufferVertex, mBufferGraph);
    78     return mTerminationGraph[producerVertex];
     79    return b->CreateICmpNE(mTerminationGraph[producerVertex], b->getSize(NotTerminated));
    7980}
    8081
     
    115116        const Binding & output = mKernel->getOutputStreamSetBinding(outputPort);
    116117        const auto prefix = makeBufferName(mKernelIndex, output);
     118        Value * const hasEnough = b->CreateICmpULE(strideLength, writable, prefix + "_hasEnough");
    117119        #ifdef PRINT_DEBUG_MESSAGES
    118120        b->CallPrintInt(prefix + "_writable", writable);
    119121        b->CallPrintInt(prefix + "_requiredOutput", strideLength);
     122        b->CallPrintInt(prefix + "_hasEnough", hasEnough);
    120123        #endif
    121         Value * const hasEnough = b->CreateICmpULE(strideLength, writable, prefix + "_hasEnough");
    122124        BasicBlock * const target = b->CreateBasicBlock(prefix + "_hasOutputSpace", mKernelLoopCall);
    123125        mWritableOutputItems[outputPort] = writable;
     
    136138    b->CreateLikelyCondBr(cond, target, mKernelLoopExit);
    137139    BasicBlock * const exitBlock = b->GetInsertBlock();
    138     mTerminatedPhi->addIncoming(b->getFalse(), exitBlock);
    139     if (mHasProgressedPhi) {
    140         mHasProgressedPhi->addIncoming(mAlreadyProgressedPhi, exitBlock);
    141     }
     140    mTerminatedPhi->addIncoming(b->getSize(NotTerminated), exitBlock);
     141    mHasProgressedPhi->addIncoming(mAlreadyProgressedPhi, exitBlock);
    142142    const auto numOfInputs = mKernel->getNumOfStreamInputs();
    143143    for (unsigned i = 0; i < numOfInputs; ++i) {
     
    217217    #endif
    218218    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    219         Value * const term = producerTerminated(inputPort);
     219        Value * const term = producerTerminated(b, inputPort);
    220220        Value * const work = b->CreateIsNotNull(numOfStrides);
    221221        Value * const progress = b->CreateOr(work, term);
     
    524524    #endif
    525525
    526     mTerminationExplicitly = b->CreateCall(getDoSegmentFunction(b), args);
     526    mTerminatedExplicitly = b->CreateCall(getDoSegmentFunction(b), args);
    527527    if (LLVM_LIKELY(!canTerminate)) {
    528         mTerminationExplicitly = b->getFalse();
     528        mTerminatedExplicitly = b->getFalse();
    529529    }
    530530
     
    670670            processed = mUpdatedProcessedPhi[i];
    671671        }
    672         processed = truncateBlockSize(b, input, processed, mTerminatedPhi);
     672        processed = truncateBlockSize(b, input, processed);
    673673        mFullyProcessedItemCount[i] = processed;
    674674    }
     
    688688    for (unsigned i = 0; i < numOfOutputs; ++i) {
    689689        const Binding & output = mKernel->getOutputStreamSetBinding(i);
    690         Value * produced = truncateBlockSize(b, output, mUpdatedProducedPhi[i], mTerminatedPhi);
     690        Value * produced = truncateBlockSize(b, output, mUpdatedProducedPhi[i]);
    691691        mFullyProducedItemCount[i]->addIncoming(produced, mKernelLoopExitPhiCatch);
    692692    }
     
    811811 * @brief maskBlockSize
    812812 ** ------------------------------------------------------------------------------------------------------------- */
    813 inline Value * PipelineCompiler::truncateBlockSize(BuilderRef b, const Binding & binding, Value * itemCount, Value * all) const {
     813Value * PipelineCompiler::truncateBlockSize(BuilderRef b, const Binding & binding, Value * itemCount) const {
    814814    // TODO: if we determine all of the inputs of a stream have a blocksize attribute, or the output has one,
    815815    // we can skip masking it on input
     
    823823        Constant * const BLOCK_WIDTH = b->getSize(b->getBitBlockWidth());
    824824        Value * const maskedItemCount = b->CreateAnd(itemCount, ConstantExpr::getNeg(BLOCK_WIDTH));
    825         itemCount = b->CreateSelect(all, itemCount, maskedItemCount);
     825        Value * const reportAll = b->CreateICmpNE(mTerminatedPhi, b->getSize(NotTerminated));
     826        itemCount = b->CreateSelect(reportAll, itemCount, maskedItemCount);
    826827    }
    827828    return itemCount;
     
    832833 ** ------------------------------------------------------------------------------------------------------------- */
    833834Value * PipelineCompiler::getFunctionFromKernelState(BuilderRef b, Type * const type, const std::string & suffix) const {
    834     const auto kn = makeKernelName(mKernelIndex);
     835    const auto prefix = makeKernelName(mKernelIndex);
    835836    b->setKernel(mPipelineKernel);
    836     Value * const funcPtr = b->getScalarField(kn + suffix);
     837    Value * const funcPtr = b->getScalarField(prefix + suffix);
    837838    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    838         b->CreateAssert(funcPtr, mKernel->getName() + "." + suffix + " is null");
     839        b->CreateAssert(funcPtr, prefix + ":" + suffix + " is null");
    839840    }
    840841    b->setKernel(mKernel);
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/pipeline_analysis.hpp

    r6258 r6261  
    66namespace kernel {
    77
    8 #warning TODO: support call bindings that produce output that are inputs of other call bindings or become scalar outputs of the pipeline
     8// TODO: support call bindings that produce output that are inputs of
     9// other call bindings or become scalar outputs of the pipeline
     10
     11// TODO: with a better model of stride rates, we could determine whether
     12// being unable to execute a kernel implies we won't be able to execute
     13// another and "skip" over the unnecessary kernels.
    914
    1015#if 1
     
    3641    using ScalarDependencyMap = RelationshipMap<ScalarDependencyGraph::vertex_descriptor>;
    3742
    38     void enumerateScalarProducerBindings(const unsigned producerVertex, const Bindings & bindings, ScalarDependencyGraph & G, ScalarDependencyMap & M) {
     43    void enumerateScalarProducerBindings(const unsigned producerVertex, const Bindings & bindings,
     44                                         ScalarDependencyGraph & G, ScalarDependencyMap & M) {
    3945        const auto n = bindings.size();
    4046        for (unsigned i = 0; i < n; ++i) {
    4147            const Relationship * const rel = getRelationship(bindings[i]);
    4248            assert (M.count(rel) == 0);
    43             Constant * const value = isa<ScalarConstant>(rel) ? cast<ScalarConstant>(rel)->value() : nullptr;
    44             const auto bufferVertex = add_vertex(value, G);
     49            const auto bufferVertex = add_vertex(nullptr, G);
    4550            add_edge(producerVertex, bufferVertex, i, G);
    4651            M.emplace(rel, bufferVertex);
     
    4853    }
    4954
    50     ScalarDependencyGraph::vertex_descriptor makeIfConstant(const Relationship * const rel, ScalarDependencyGraph & G, ScalarDependencyMap & M) {
     55    ScalarDependencyGraph::vertex_descriptor makeIfConstant(const Relationship * const rel,
     56                                                            ScalarDependencyGraph & G, ScalarDependencyMap & M) {
    5157        const auto f = M.find(rel);
    5258        if (LLVM_LIKELY(f != M.end())) {
     
    6268
    6369    template <typename Array>
    64     void enumerateScalarConsumerBindings(const unsigned consumerVertex, const Array & array, ScalarDependencyGraph & G, ScalarDependencyMap & M) {
     70    void enumerateScalarConsumerBindings(const unsigned consumerVertex, const Array & array,
     71                                         ScalarDependencyGraph & G, ScalarDependencyMap & M) {
    6572        const auto n = array.size();
    6673        for (unsigned i = 0; i < n; ++i) {
     
    7582/** ------------------------------------------------------------------------------------------------------------- *
    7683 * @brief makeScalarDependencyGraph
     84 *
     85 * producer -> buffer/scalar -> consumer
    7786 ** ------------------------------------------------------------------------------------------------------------- */
    7887ScalarDependencyGraph PipelineCompiler::makeScalarDependencyGraph() const {
    7988
    80     const auto numOfKernels = mPipeline.size();
    81     const auto & callBindings = mPipelineKernel->getCallBindings();
    82     const auto numOfCallBindings = callBindings.size();
    83     const auto initialSize = numOfKernels + numOfCallBindings + 1;
     89    const auto pipelineInput = 0;
     90    const auto pipelineOutput = mLastKernel;
     91    const auto & call = mPipelineKernel->getCallBindings();
     92    const auto numOfCalls = call.size();
     93    const auto firstCall = mLastKernel + 1;
     94    const auto initialSize = firstCall + numOfCalls;
    8495
    8596    ScalarDependencyGraph G(initialSize);
    8697    ScalarDependencyMap M;
    8798
    88     enumerateScalarProducerBindings(numOfKernels, mPipelineKernel->getInputScalarBindings(), G, M);
     99    enumerateScalarProducerBindings(pipelineInput, mPipelineKernel->getInputScalarBindings(), G, M);
    89100    // verify each scalar input of the kernel is an input to the pipeline
    90     for (unsigned i = 0; i < numOfKernels; ++i) {
     101    for (unsigned i = mFirstKernel; i < mLastKernel; ++i) {
    91102        enumerateScalarConsumerBindings(i, mPipeline[i]->getInputScalarBindings(), G, M);
    92103    }
    93104    // enumerate the output scalars
    94     for (unsigned i = 0; i < numOfKernels; ++i) {
     105    for (unsigned i = mFirstKernel; i < mLastKernel; ++i) {
    95106        enumerateScalarProducerBindings(i, mPipeline[i]->getOutputScalarBindings(), G, M);
    96107    }
    97108    // enumerate the call bindings
    98     for (unsigned k = 0; k < numOfCallBindings; ++k) {
    99         const CallBinding & call = callBindings[k];
    100         enumerateScalarConsumerBindings(numOfKernels + 1 + k, call.Args, G, M);
     109    for (unsigned i = 0; i < numOfCalls; ++i) {
     110        enumerateScalarConsumerBindings(firstCall + i, call[i].Args, G, M);
    101111    }
    102112    // enumerate the pipeline outputs
    103     enumerateScalarConsumerBindings(numOfKernels, mPipelineKernel->getOutputScalarBindings(), G, M);
     113    enumerateScalarConsumerBindings(pipelineOutput, mPipelineKernel->getOutputScalarBindings(), G, M);
     114
    104115    return G;
    105116}
     
    117128        if (d != 0) {
    118129            flat_set<Vertex> V;
    119             V.reserve(num_vertices(G) - 1);
     130            V.reserve(num_vertices(G) - 2);
    120131            std::queue<Vertex> Q;
    121             // do a BFS to find one a path from t to s
     132            // do a BFS to search for a t-s path
    122133            Q.push(t);
    123134            for (;;) {
     
    126137                for (auto e : make_iterator_range(out_edges(u, G))) {
    127138                    const auto v = target(e, G);
    128                     if (V.count(v) != 0) continue;
    129                     if (LLVM_UNLIKELY(v == s)) return false;
    130                     Q.push(v);
    131                     V.insert(v);
     139                    if (LLVM_UNLIKELY(v == s)) {
     140                        // we found a t-s path
     141                        return false;
     142                    }
     143                    assert ("G was not initially acyclic!" && v != s);
     144                    if (LLVM_LIKELY(V.insert(v).second)) {
     145                        Q.push(v);
     146                    }
    132147                }
    133148                if (Q.empty()) {
     
    149164std::vector<unsigned> PipelineCompiler::lexicalOrderingOfStreamIO() const {
    150165
     166    using Graph = adjacency_list<hash_setS, vecS, bidirectionalS>;
     167
     168    const auto pipelineInput = 0;
     169    const auto pipelineOutput = mLastKernel;
     170
    151171    const auto numOfInputs = mKernel->getNumOfStreamInputs();
    152172    const auto numOfOutputs = mKernel->getNumOfStreamOutputs();
     173    const auto firstOutput = numOfInputs;
    153174    const auto numOfPorts = numOfInputs + numOfOutputs;
    154 
    155     using Graph = adjacency_list<vecS, vecS, bidirectionalS>;
    156175
    157176    Graph G(numOfPorts);
     
    178197        }
    179198    }
    180 
     199    // check any pipeline input first
     200    if (out_degree(pipelineInput, mBufferGraph)) {
     201        for (unsigned i = 0; i < numOfInputs; ++i) {
     202            const auto buffer = getInputBufferVertex(i);
     203            if (LLVM_UNLIKELY(parent(buffer, mBufferGraph) == pipelineInput)) {
     204                for (unsigned j = 0; j < i; ++j) {
     205                    add_edge_if_no_induced_cycle(i, j, G);
     206                }
     207                for (unsigned j = i + 1; j < numOfPorts; ++j) {
     208                    add_edge_if_no_induced_cycle(i, j, G);
     209                }
     210            }
     211        }
     212    }
     213
     214    // ... and check any pipeline output first
     215    if (out_degree(pipelineInput, mBufferGraph)) {
     216        for (unsigned i = 0; i < numOfOutputs; ++i) {
     217            const auto buffer = getOutputBufferVertex(i);
     218            if (LLVM_UNLIKELY(has_child(buffer, pipelineOutput, mBufferGraph))) {
     219                const auto k = firstOutput + i;
     220                for (unsigned j = 0; j < k; ++j) {
     221                    add_edge_if_no_induced_cycle(k, j, G);
     222                }
     223                for (unsigned j = k + 1; j < numOfPorts; ++j) {
     224                    add_edge_if_no_induced_cycle(k, j, G);
     225                }
     226            }
     227        }
     228    }
    181229    // check any dynamic buffer last
    182230    std::vector<unsigned> D;
     
    187235        }
    188236    }
     237
    189238    for (const auto i : D) {
    190239        for (unsigned j = 0; j < numOfInputs; ++j) {
    191             add_edge_if_no_induced_cycle(j, numOfInputs + i, G);
     240            add_edge_if_no_induced_cycle(j, firstOutput + i, G);
    192241        }
    193242        auto Dj = D.begin();
     
    196245                ++Dj;
    197246            } else {
    198                 add_edge_if_no_induced_cycle(numOfInputs + j, numOfInputs + i, G);
     247                add_edge_if_no_induced_cycle(firstOutput + j, firstOutput + i, G);
    199248            }
    200249        }
     
    207256    return lexicalOrdering(std::move(G), mKernel->getName() + " has cyclic port dependencies.");
    208257}
    209 
    210 
    211 namespace {
    212 
    213     using TerminationMap = RelationshipMap<TerminationGraph::vertex_descriptor>;
    214 
    215     void enumerateTerminationProducerBindings(const unsigned producerVertex, const Bindings & bindings, TerminationGraph & G, TerminationMap & M) {
    216         const auto n = bindings.size();
    217         for (unsigned i = 0; i < n; ++i) {
    218             const Relationship * const rel = getRelationship(bindings[i]);
    219             if (LLVM_UNLIKELY(isa<ScalarConstant>(rel))) continue;
    220             assert (M.count(rel) == 0);
    221             const auto bufferVertex = add_vertex(G);
    222             add_edge(producerVertex, bufferVertex, G); // producer -> buffer ordering
    223             M.emplace(rel, bufferVertex);
    224         }
    225     }
    226 
    227     template <typename Array>
    228     void enumerateTerminationConsumerBindings(const unsigned consumerVertex, const Array & array, TerminationGraph & G, TerminationMap & M) {
    229         const auto n = array.size();
    230         for (unsigned i = 0; i < n; ++i) {
    231             const Relationship * const rel = getRelationship(array[i]);
    232             if (LLVM_UNLIKELY(isa<ScalarConstant>(rel))) continue;
    233             const auto f = M.find(rel);
    234             const auto bufferVertex = f->second;
    235             add_edge(bufferVertex, consumerVertex, G); // buffer -> consumer ordering
    236         }
    237     }
    238 
    239 } // end of anonymous namespace
    240258
    241259namespace {
     
    274292ConsumerGraph PipelineCompiler::makeConsumerGraph()  const {
    275293
     294    const auto firstBuffer = mLastKernel + 1;
    276295    const auto lastBuffer = num_vertices(mBufferGraph);
    277296    ConsumerGraph G(lastBuffer);
    278     const auto numOfKernels = mPipeline.size();
    279     const auto firstBuffer = numOfKernels + 1;
    280297
    281298#if 0
     
    396413}
    397414
    398 
    399415/** ------------------------------------------------------------------------------------------------------------- *
    400416 * @brief makeTerminationGraph
     
    404420 ** ------------------------------------------------------------------------------------------------------------- */
    405421TerminationGraph PipelineCompiler::makeTerminationGraph() const {
     422
     423    // A pipeline will end for one or two reasons:
     424
     425    // 1) no progress can be made by any kernel.
     426
     427    // 2) all pipeline sinks have terminated (i.e., any kernel that writes
     428    // to a pipeline output, is marked as having a side-effect, or produces
     429    // an input for some call).
     430
    406431    using VertexVector = std::vector<TerminationGraph::vertex_descriptor>;
    407432
    408     const auto numOfKernels = mPipeline.size();
    409     TerminationGraph G(numOfKernels + 1);
    410     TerminationMap M;
    411 
    412     // make an edge from the pipeline input to a buffer vertex
    413     enumerateTerminationProducerBindings(numOfKernels, mPipelineKernel->getInputScalarBindings(), G, M);
    414     enumerateTerminationProducerBindings(numOfKernels, mPipelineKernel->getInputStreamSetBindings(), G, M);
    415     G[numOfKernels] = nullptr;
    416 
    417     // make an edge from each producing kernel to a buffer vertex
    418     for (unsigned i = 0; i < numOfKernels; ++i) {
    419         const auto & producer = mPipeline[i];
    420         enumerateTerminationProducerBindings(i, producer->getOutputStreamSetBindings(), G, M);
    421         enumerateTerminationProducerBindings(i, producer->getOutputScalarBindings(), G, M);
    422         G[i] = nullptr;
    423     }
    424 
    425     // make an edge from each buffer to its consuming kernel(s)
    426     for (unsigned i = 0; i < numOfKernels; ++i) {
    427         const auto & consumer = mPipeline[i];
    428         enumerateTerminationConsumerBindings(i, consumer->getInputScalarBindings(), G, M);
    429         enumerateTerminationConsumerBindings(i, consumer->getInputStreamSetBindings(), G, M);
    430         if (LLVM_UNLIKELY(consumer->hasAttribute(AttrId::SideEffecting))) {
    431             add_edge(i, numOfKernels, G);
    432         }
    433     }
    434 
    435     // make an edge from a buffer vertex to each pipeline output
    436     for (const CallBinding & call : mPipelineKernel->getCallBindings()) {
    437         enumerateTerminationConsumerBindings(numOfKernels, call.Args, G, M);
    438     }
    439 
    440     clear_out_edges(numOfKernels, G);
    441     enumerateTerminationConsumerBindings(numOfKernels, mPipelineKernel->getOutputStreamSetBindings(), G, M);
    442     enumerateTerminationConsumerBindings(numOfKernels, mPipelineKernel->getOutputScalarBindings(), G, M);
    443 
     433    const auto numOfCalls = mPipelineKernel->getCallBindings().size();
     434    const auto pipelineOutput = mLastKernel;
     435    const auto firstCall = pipelineOutput + 1;
     436    const auto lastCall = firstCall + numOfCalls;
     437
     438    TerminationGraph G(pipelineOutput + 1);
     439
     440    // copy and summarize producer -> consumer relations from the buffer graph
     441    for (unsigned i = mFirstKernel; i < mLastKernel; ++i) {
     442        for (auto buffer : make_iterator_range(out_edges(i, mBufferGraph))) {
     443            const auto bufferVertex = target(buffer, mBufferGraph);
     444            for (auto consumer : make_iterator_range(out_edges(bufferVertex, mBufferGraph))) {
     445                const auto j = target(consumer, mBufferGraph);
     446                add_edge(i, j, G);
     447            }
     448        }
     449    }
     450
     451    // copy and summarize any output scalars of the pipeline or any calls
     452    for (unsigned i = pipelineOutput; i < lastCall; ++i) {
     453        for (auto relationship : make_iterator_range(in_edges(i, mScalarDependencyGraph))) {
     454            const auto relationshipVertex = source(relationship, mScalarDependencyGraph);
     455            for (auto producer : make_iterator_range(in_edges(relationshipVertex, mScalarDependencyGraph))) {
     456                const auto j = source(producer, mScalarDependencyGraph);
     457                add_edge(j, pipelineOutput, G);
     458            }
     459        }
     460    }
     461
     462    // create a k_i -> P_out edge for every kernel with a side effect attribute
     463    for (unsigned i = mFirstKernel; i < mLastKernel; ++i) {
     464        if (LLVM_UNLIKELY(mPipeline[i]->hasAttribute(AttrId::SideEffecting))) {
     465            add_edge(i, pipelineOutput, G);
     466        }
     467    }
     468
     469    // generate a transitive closure
    444470    VertexVector ordering;
    445471    ordering.reserve(num_vertices(G));
    446472    topological_sort(G, std::back_inserter(ordering));
    447473
    448     // generate a transitive closure
    449474    for (unsigned u : ordering) {
    450475        for (auto e : make_iterator_range(in_edges(u, G))) {
     
    456481    }
    457482
    458     // delete all buffer edges
    459     const auto firstBuffer = numOfKernels + 1;
    460     const auto lastBuffer = num_vertices(G);
    461     for (auto i = firstBuffer; i < lastBuffer; ++i) {
    462         clear_vertex(i, G);
    463     }
    464 
    465483    // then take the transitive reduction
    466484    VertexVector sources;
    467     for (unsigned u = firstBuffer; u--; ) {
     485    for (unsigned u = pipelineOutput; u--; ) {
    468486        for (auto e : make_iterator_range(in_edges(u, G))) {
    469487            sources.push_back(source(e, G));
     
    490508 ** ------------------------------------------------------------------------------------------------------------- */
    491509PopCountGraph PipelineCompiler::makePopCountGraph() const {
    492     const auto numOfKernels = mPipeline.size();
    493510
    494511    using PopCountVertex = PopCountGraph::vertex_descriptor;
     
    557574    };
    558575
    559     for (unsigned i = 0; i < numOfKernels; ++i) {
     576    for (unsigned i = mFirstKernel; i < mLastKernel; ++i) {
    560577        const Kernel * const kernel = mPipeline[i];
    561578        const auto numOfInputs = kernel->getNumOfStreamInputs();
     
    565582            addPopCountDependency(kernel, i, j, input);
    566583        }
     584        const auto firstOutput = numOfInputs;
    567585        for (unsigned j = 0; j < numOfOutputs; ++j) {
    568586            const auto & output = kernel->getOutputStreamSetBinding(j);
    569             addPopCountDependency(kernel, i, j + numOfInputs, output);
     587            addPopCountDependency(kernel, i, firstOutput + j, output);
    570588        }
    571589        M.clear();
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/pipeline_builder.cpp

    r6253 r6261  
    284284
    285285    const std::unique_ptr<kernel::KernelBuilder> & b = mDriver.getBuilder();
    286     Type * const addrPtrTy = b->getVoidPtrTy(); // is voidptrty sufficient?
     286    unsigned stride = 0;
     287    Type * const addrPtrTy = b->getVoidPtrTy();
    287288    for (auto i : ordering) {
    288289        if (LLVM_LIKELY(i < numOfKernels)) {
    289290            Kernel * const k = mKernels[i];
    290291            if (k->hasFamilyName()) {
    291                 const auto kn = PipelineKernel::makeKernelName(k, index[i]);
     292                const auto kn = PipelineKernel::makeKernelName(k, index[i] + 1);
    292293                addInputScalar(addrPtrTy, kn);
    293294                addInputScalar(addrPtrTy, kn + INITIALIZE_FUNCTION_POINTER_SUFFIX);
     
    296297            }
    297298            pipeline.emplace_back(k);
     299            assert (k->getStride());
     300            if (stride) {
     301                stride = boost::lcm(stride, k->getStride());
     302            } else {
     303                stride = k->getStride();
     304            }
    298305        }
    299306    }
    300307
    301308    PipelineKernel * const pk =
    302         new PipelineKernel(std::move(signature), mNumOfThreads,
     309        new PipelineKernel(b, std::move(signature), mNumOfThreads,
    303310                           std::move(pipeline), std::move(mCallBindings),
    304311                           std::move(mInputStreamSets), std::move(mOutputStreamSets),
    305312                           std::move(mInputScalars), std::move(mOutputScalars));
     313
     314    pk->setStride(stride);
    306315
    307316    return pk;
     
    343352    out.flush();
    344353
     354    // TODO: if the condition is also one of the normal inputs and the rate is compatible,
     355    // we could avoid sending it through.
     356
    345357    OptimizationBranch * const br =
    346             new OptimizationBranch(std::move(name),
     358            new OptimizationBranch(mDriver.getBuilder(), std::move(name),
    347359                                   mCondition, trueBranch, falseBranch,
    348360                                   std::move(mInputStreamSets), std::move(mOutputStreamSets),
    349361                                   std::move(mInputScalars), std::move(mOutputScalars));
    350362
     363    br->setStride(boost::lcm(trueBranch->getStride(), falseBranch->getStride()));
    351364
    352365    return br;
     
    464477}
    465478
     479template <typename IfType>
     480inline void addCondition(Relationship * const condition, Bindings & bindings) {
     481    if (isa<IfType>(condition)) {
     482        bindings.emplace_back(OptimizationBranch::CONDITION_TAG, condition, FixedRate(1));
     483    }
     484}
     485
    466486OptimizationBranchBuilder::OptimizationBranchBuilder(
    467487      BaseDriver & driver,
    468       StreamSet * const condition,
     488      Relationship * const condition,
    469489      Bindings && stream_inputs, Bindings && stream_outputs,
    470490      Bindings && scalar_inputs, Bindings && scalar_outputs)
     
    474494      std::move(scalar_inputs), std::move(scalar_outputs))
    475495, mCondition(condition)
    476 , mTrueBranch()
    477 , mFalseBranch() {
    478 
     496, mTrueBranch(nullptr)
     497, mFalseBranch(nullptr) {
     498    addCondition<StreamSet>(condition, mInputStreamSets);
     499    addCondition<Scalar>(condition, mInputScalars);
    479500}
    480501
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/pipeline_compiler.hpp

    r6258 r6261  
    4949// BOOST_STRONG_TYPEDEF(unsigned, PortNumber)
    5050
    51 #warning TODO: these graphs are similar; look into streamlining their generation.
    52 
    53 // TODO: split pipeline vertex into input and output vertices to keep all graphs DAGs
    54 // without having to delete edges.
    55 
    5651enum class BufferType : unsigned {
    5752    Internal = 0
     
    6358    Value *             TotalItems;
    6459
    65 
    66     kernel::Kernel *    Kernel;
    6760    StreamSetBuffer *   Buffer;
    6861
     
    7568    BufferType          Type;
    7669
    77     BufferNode() : TotalItems(nullptr), Kernel(nullptr), Buffer(nullptr), Lower(), Upper(), Overflow(0), Fasimile(0), Type(BufferType::Internal) {}
     70    BufferNode() : TotalItems(nullptr), Buffer(nullptr), Lower(), Upper(), Overflow(0), Fasimile(0), Type(BufferType::Internal) {}
    7871};
    7972
     
    161154using PopCountGraph = adjacency_list<vecS, vecS, bidirectionalS, no_property, PopCountEdge>;
    162155
     156enum TerminationMode : unsigned {
     157    NotTerminated = 0
     158    , TerminatedNormally = 1
     159    , TerminatedExplicitly = 2
     160};
     161
    163162const static std::string LOGICAL_SEGMENT_SUFFIX = ".LSN";
    164163const static std::string TERMINATION_SIGNAL_SUFFIX = ".TERM";
     
    184183
    185184    void addInternalKernelProperties(BuilderRef b, const unsigned kernelIndex);
     185    void acquireCurrentSegment(BuilderRef b);
     186    void releaseCurrentSegment(BuilderRef b);
    186187
    187188// main pipeline functions
     
    189190    void start(BuilderRef b, Value * const initialSegNo);
    190191    void setActiveKernel(BuilderRef b, const unsigned index);
    191     void acquireCurrentSegment(BuilderRef b);
    192192    void executeKernel(BuilderRef b);
    193193    void end(BuilderRef b, const unsigned step);
     
    235235    void computeFullyProducedItemCounts(BuilderRef b);
    236236
    237     void updatePhisAfterTermination(BuilderRef b);
     237    void updatePhisAfterTermination(BuilderRef b, Value * const terminationMode);
    238238
    239239    void zeroFillPartiallyWrittenOutputStreams(BuilderRef b);
     
    242242    void writeFinalConsumedItemCounts(BuilderRef b);
    243243    void readFinalProducedItemCounts(BuilderRef b);
    244     void releaseCurrentSegment(BuilderRef b);
     244
    245245    void writeCopyToOverflowLogic(BuilderRef b);
    246246    void checkForSufficientInputData(BuilderRef b, const unsigned inputPort);
     
    268268    Value * subtractLookahead(BuilderRef b, const unsigned inputPort, Value * itemCount) const;
    269269    Constant * getLookahead(BuilderRef b, const unsigned inputPort) const;
    270     Value * truncateBlockSize(BuilderRef b, const Binding & binding, Value * itemCount, Value * all) const;
     270    Value * truncateBlockSize(BuilderRef b, const Binding & binding, Value * itemCount) const;
    271271    Value * getTotalItemCount(BuilderRef b, const unsigned inputPort) const;
    272     Value * producerTerminated(const unsigned inputPort) const;
    273     Value * initiallyTerminated(BuilderRef b) const;
    274     void setTerminated(BuilderRef b, Value * const terminated);
     272    Value * producerTerminated(BuilderRef b, const unsigned inputPort) const;
     273    Value * initiallyTerminated(BuilderRef b);
     274    Value * setTerminated(BuilderRef b, Value * const condition, const TerminationMode trueMode, const TerminationMode falseMode) const;
    275275    void resetMemoizedFields();
    276276
     
    341341    void enumerateBufferProducerBindings(const unsigned producer, const Bindings & bindings, BufferGraph & G, BufferMap & M);
    342342    void enumerateBufferConsumerBindings(const unsigned consumer, const Bindings & bindings, BufferGraph & G, BufferMap & M);
    343     BufferRateData getBufferRateData(const unsigned index, const unsigned port, bool input);
     343    BufferRateData getBufferRateData(const Kernel * const kernel, const Binding &binding, const unsigned port) const;
    344344
    345345    void constructBuffers(BuilderRef b);
     
    381381    StreamSetBuffer * getOutputBuffer(const unsigned outputPort) const;
    382382
     383    LLVM_READNONE bool nestedPipeline() const {
     384        return out_degree(0, mBufferGraph) != 0 || in_degree(mLastKernel, mBufferGraph) != 0;
     385    }
     386
    383387    static LLVM_READNONE const Binding & getBinding(const Kernel * kernel, const Port port, const unsigned i) {
    384388        if (port == Port::Input) {
     
    396400    LLVM_READNONE const Binding & getOutputBinding(const Kernel * const consumer, const unsigned index) const;
    397401
    398     void writeOutputScalars(BuilderRef b, const unsigned u, std::vector<Value *> & args);
     402    void writeOutputScalars(BuilderRef b, const unsigned index, std::vector<Value *> & args);
    399403
    400404    void verifyInputItemCount(BuilderRef b, Value * processed, const unsigned inputPort) const;
     
    410414
    411415    PipelineKernel * const                      mPipelineKernel;
    412     const Kernels &                             mPipeline;
     416    const Kernels                               mPipeline;
     417    const unsigned                              mFirstKernel;
     418    const unsigned                              mLastKernel;
     419
    413420
    414421    OwnedStreamSetBuffers                       mOwnedBuffers;
     
    417424
    418425    // pipeline state
    419     PHINode *                                   mTerminatedPhi = nullptr;
    420426    PHINode *                                   mSegNo = nullptr;
     427    PHINode *                                   mProgressCounter = nullptr;
     428    Value *                                     mPipelineProgress = nullptr;
     429    Value *                                     mPipelineTerminated = nullptr;
    421430    BasicBlock *                                mPipelineLoop = nullptr;
    422431    BasicBlock *                                mKernelEntry = nullptr;
     
    429438    BasicBlock *                                mKernelExit = nullptr;
    430439    BasicBlock *                                mPipelineEnd = nullptr;
    431     std::vector<Value *>                        mOutputScalars;
    432440
    433441    // kernel state
     442    Value *                                     mTerminatedInitially = nullptr;
     443    PHINode *                                   mHasProgressedPhi = nullptr;
     444    PHINode *                                   mAlreadyProgressedPhi = nullptr;
     445    PHINode *                                   mTerminatedPhi = nullptr;
    434446    Value *                                     mNumOfLinearStrides = nullptr;
    435     Value *                                     mTerminationExplicitly = nullptr;
    436 
     447    Value *                                     mTerminatedExplicitly = nullptr;
    437448    std::vector<unsigned>                       mPortOrdering;
    438449
     
    467478    // debug + misc state
    468479    Value *                                     mCycleCountStart = nullptr;
    469     PHINode *                                   mDeadLockCounter = nullptr;
    470     Value *                                     mPipelineProgress = nullptr;
    471     PHINode *                                   mHasProgressedPhi = nullptr;
    472     PHINode *                                   mAlreadyProgressedPhi = nullptr;
    473480
    474481    // popcount state
     
    486493};
    487494
     495Kernels makePipelineList(PipelineKernel * const pk) {
     496    const Kernels & P = pk->getKernels();
     497    const auto n = P.size();
     498    Kernels L(n + 2);
     499    L[0] = pk;
     500    for (unsigned i = 0; i != n; ++i) {
     501        L[i + 1] = P[i];
     502    }
     503    L[n + 1] = pk;
     504    return L;
     505}
     506
    488507/** ------------------------------------------------------------------------------------------------------------- *
    489508 * @brief constructor
     
    491510inline PipelineCompiler::PipelineCompiler(BuilderRef b, PipelineKernel * const pipelineKernel)
    492511: mPipelineKernel(pipelineKernel)
    493 , mPipeline(pipelineKernel->mKernels)
     512, mPipeline(makePipelineList(pipelineKernel))
     513, mFirstKernel(1)
     514, mLastKernel(mPipeline.size() - 1)
    494515, mBufferGraph(makeBufferGraph(b))
    495516, mConsumerGraph(makeConsumerGraph())
     
    497518, mTerminationGraph(makeTerminationGraph())
    498519, mPopCountGraph(makePopCountGraph()) {
    499 
    500 
     520    initializePopCounts();
    501521}
    502522
     
    517537        }
    518538    }
     539    assert (!"input buffer not found");
    519540    llvm_unreachable("input buffer not found");
    520541}
     
    543564        }
    544565    }
     566    assert (!"output buffer not found");
    545567    llvm_unreachable("output buffer not found");
    546568}
     
    654676}
    655677
     678template <typename Graph>
     679inline bool has_child(const typename graph_traits<Graph>::vertex_descriptor u,
     680                      const typename graph_traits<Graph>::vertex_descriptor v,
     681                      const Graph & G) {
     682    for (const auto & e : make_iterator_range(out_edges(u, G))) {
     683        if (target(e, G) == v) {
     684            return true;
     685        }
     686    }
     687    return false;
     688}
     689
     690
     691
     692
     693
    656694} // end of namespace
    657695
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/pipeline_kernel.cpp

    r6253 r6261  
    322322 * @brief constructor
    323323 ** ------------------------------------------------------------------------------------------------------------- */
    324 PipelineKernel::PipelineKernel(std::string && signature, const unsigned numOfThreads,
     324PipelineKernel::PipelineKernel(const std::unique_ptr<KernelBuilder> & b,
     325                               std::string && signature, const unsigned numOfThreads,
    325326                               Kernels && kernels, CallBindings && callBindings,
    326327                               Bindings && stream_inputs, Bindings && stream_outputs,
    327                                Bindings &&scalar_inputs, Bindings && scalar_outputs)
    328 : Kernel(TypeId::Pipeline,
     328                               Bindings && scalar_inputs, Bindings && scalar_outputs)
     329: Kernel(b, TypeId::Pipeline,
    329330         "p" + std::to_string(numOfThreads) + "_" + getStringHash(signature),
    330331         std::move(stream_inputs), std::move(stream_outputs),
     
    334335, mCallBindings(std::move(callBindings))
    335336, mSignature(std::move(signature)) {
    336 
     337    addAttributesFrom(mKernels);
    337338}
    338339
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/pipeline_logic.hpp

    r6258 r6261  
    77
    88// NOTE: the following is a workaround for an LLVM bug for 32-bit VMs on 64-bit architectures.
    9 // When calculating the address of a local stack allocated object, the size of a pointer will
    10 // be 32-bits but when performing a GEP on the same pointer as the result of a "malloc" or
    11 // when passed as a function parameter, the size will be 64-bits. More investigation should be
    12 // done to determine which versions of LLVM are affected by this bug.
    13 
    14 inline LLVM_READNONE bool useMalloc(BuilderRef b) {
     9// When calculating the address of a local stack allocated object, the size of a pointer is
     10// 32-bits but when performing the same GEP on a pointer returned by "malloc" or passed as a
     11// function argument, the size is 64-bits. More investigation is needed to determine which
     12// versions of LLVM are affected by this bug.
     13
     14inline LLVM_READNONE bool allocateOnHeap(const BuilderRef b) {
    1515    DataLayout DL(b->getModule());
    1616    return (DL.getPointerSizeInBits() != b->getSizeTy()->getBitWidth());
     
    1919inline Value * makeStateObject(BuilderRef b, Type * type) {
    2020    Value * ptr = nullptr;
    21     if (LLVM_UNLIKELY(useMalloc(b))) {
     21    if (LLVM_UNLIKELY(allocateOnHeap(b))) {
    2222        ptr = b->CreateCacheAlignedMalloc(type);
    2323    } else {
     
    2929
    3030inline void destroyStateObject(BuilderRef b, Value * ptr) {
    31     if (LLVM_UNLIKELY(useMalloc(b))) {
     31    if (LLVM_UNLIKELY(allocateOnHeap(b))) {
    3232        b->CreateFree(ptr);
     33    }
     34}
     35
     36/** ------------------------------------------------------------------------------------------------------------- *
     37 * @brief addPipelineKernelProperties
     38 ** ------------------------------------------------------------------------------------------------------------- */
     39inline void PipelineCompiler::addPipelineKernelProperties(BuilderRef b) {
     40    b->setKernel(mPipelineKernel);
     41    for (unsigned i = mFirstKernel; i < mLastKernel; ++i) {
     42        addBufferHandlesToPipelineKernel(b, i);
     43        addInternalKernelProperties(b, i);
     44        addConsumerKernelProperties(b, i);
     45        addPopCountScalarsToPipelineKernel(b, i);
     46    }
     47    b->setKernel(mPipelineKernel);
     48}
     49
     50/** ------------------------------------------------------------------------------------------------------------- *
     51 * @brief addInternalKernelProperties
     52 ** ------------------------------------------------------------------------------------------------------------- */
     53inline void PipelineCompiler::addInternalKernelProperties(BuilderRef b, const unsigned kernelIndex) {
     54
     55    IntegerType * const sizeTy = b->getSizeTy();
     56
     57    const auto name = makeKernelName(kernelIndex);
     58    // TODO: prove two termination signals can be fused into a single counter?
     59    mPipelineKernel->addInternalScalar(sizeTy, name + TERMINATION_SIGNAL_SUFFIX);
     60    mPipelineKernel->addInternalScalar(sizeTy, name + LOGICAL_SEGMENT_SUFFIX);
     61
     62    // TODO: non deferred item count for fixed rates could be calculated from total # of segments.
     63    const Kernel * const kernel = mPipeline[kernelIndex];
     64    const auto numOfInputs = kernel->getNumOfStreamInputs();
     65    for (unsigned i = 0; i < numOfInputs; i++) {
     66        const Binding & input = kernel->getInputStreamSetBinding(i);
     67        const auto prefix = makeBufferName(kernelIndex, input);
     68        if (input.isDeferred()) {
     69            mPipelineKernel->addInternalScalar(sizeTy, prefix + DEFERRED_ITEM_COUNT_SUFFIX);
     70        }
     71        mPipelineKernel->addInternalScalar(sizeTy, prefix + ITEM_COUNT_SUFFIX);
     72    }
     73
     74    const auto numOfOutputs = kernel->getNumOfStreamOutputs();
     75    for (unsigned i = 0; i < numOfOutputs; i++) {
     76        const Binding & output = kernel->getOutputStreamSetBinding(i);
     77        const auto prefix = makeBufferName(kernelIndex, output);
     78        mPipelineKernel->addInternalScalar(sizeTy, prefix + ITEM_COUNT_SUFFIX);
     79    }
     80
     81}
     82
     83/** ------------------------------------------------------------------------------------------------------------- *
     84 * @brief generateInitializeMethod
     85 ** ------------------------------------------------------------------------------------------------------------- */
     86void PipelineCompiler::generateInitializeMethod(BuilderRef b) {
     87    for (unsigned i = mFirstKernel; i < mLastKernel; ++i) {
     88        mPipeline[i]->addKernelDeclarations(b);
     89    }
     90    for (unsigned i = mFirstKernel; i < mLastKernel; ++i) {
     91        Kernel * const kernel = mPipeline[i];
     92        if (!kernel->hasFamilyName()) {
     93            Value * const handle = kernel->createInstance(b);
     94            b->setScalarField(makeKernelName(i), handle);
     95        }
     96    }
     97    constructBuffers(b);
     98    std::vector<Value *> args;
     99    for (unsigned i = mFirstKernel; i < mLastKernel; ++i) {
     100        setActiveKernel(b, i);
     101        args.resize(in_degree(i, mScalarDependencyGraph) + 1);
     102        #ifndef NDEBUG
     103        std::fill(args.begin(), args.end(), nullptr);
     104        #endif
     105        args[0] = mKernel->getHandle();
     106        b->setKernel(mPipelineKernel);
     107        for (const auto ce : make_iterator_range(in_edges(i, mScalarDependencyGraph))) {
     108            const auto j = mScalarDependencyGraph[ce] + 1;
     109            const auto pe = in_edge(source(ce, mScalarDependencyGraph), mScalarDependencyGraph);
     110            const auto k = mScalarDependencyGraph[pe];
     111            const Binding & input = mPipelineKernel->getInputScalarBinding(k);
     112            assert (args[j] == nullptr);
     113            args[j] = b->getScalarField(input.getName());
     114        }
     115        b->setKernel(mKernel);
     116        Value * const terminatedOnInit = b->CreateCall(getInitializationFunction(b), args);
     117        if (mKernel->canSetTerminateSignal()) {
     118            setTerminated(b, terminatedOnInit, TerminatedExplicitly, NotTerminated);
     119        }
    33120    }
    34121}
     
    45132    setThreadLocalState(b, localState);
    46133    start(b, b->getSize(0));
    47     for (unsigned i = 0; i < mPipeline.size(); ++i) {
     134    for (unsigned i = mFirstKernel; i < mLastKernel; ++i) {
    48135        setActiveKernel(b, i);
    49136        executeKernel(b);
     
    68155
    69156    Module * const m = b->getModule();
    70     IntegerType * const sizeTy = b->getSizeTy();
    71157    PointerType * const voidPtrTy = b->getVoidPtrTy();
    72158    ConstantInt * const ZERO = b->getInt32(0);
     
    87173    // (n - 1) threads to handle the subsequent offsets
    88174    const unsigned threads = numOfThreads - 1;
    89     Type * const pthreadsTy = ArrayType::get(sizeTy, threads);
     175    Type * const pthreadTy = TypeBuilder<pthread_t, false>::get(b->getContext());
     176    Type * const pthreadsTy = ArrayType::get(pthreadTy, threads);
    90177    AllocaInst * const pthreads = b->CreateCacheAlignedAlloca(pthreadsTy);
    91178    std::vector<Value *> threadIdPtr(threads);
     
    121208    // generate the pipeline logic for this thread
    122209    start(b, segmentOffset);
    123     for (unsigned i = 0; i < mPipeline.size(); ++i) {
     210    for (unsigned i = mFirstKernel; i < mLastKernel; ++i) {
    124211        setActiveKernel(b, i);
    125212        acquireCurrentSegment(b);
     
    146233}
    147234
     235/** ------------------------------------------------------------------------------------------------------------- *
     236 * @brief generateFinalizeMethod
     237 ** ------------------------------------------------------------------------------------------------------------- */
     238void PipelineCompiler::generateFinalizeMethod(BuilderRef b) {
     239    printOptionalCycleCounter(b);
     240    for (unsigned i = mFirstKernel; i < mLastKernel; ++i) {
     241        setActiveKernel(b, i);
     242        loadBufferHandles(b);
     243        mScalarDependencyGraph[i] = b->CreateCall(getFinalizeFunction(b), mKernel->getHandle());
     244    }
     245    releaseBuffers(b);
     246}
     247
     248/** ------------------------------------------------------------------------------------------------------------- *
     249 * @brief getFinalOutputScalars
     250 ** ------------------------------------------------------------------------------------------------------------- */
     251std::vector<Value *> PipelineCompiler::getFinalOutputScalars(BuilderRef b) {
     252
     253    const auto & calls = mPipelineKernel->getCallBindings();
     254    const auto numOfCalls = calls.size();
     255    std::vector<Value *> args;
     256    b->setKernel(mPipelineKernel);
     257    const auto pipelineOutput = mLastKernel;
     258    const auto firstCall = pipelineOutput + 1;
     259    for (unsigned k = 0; k < numOfCalls; ++k) {
     260        writeOutputScalars(b, firstCall + k, args);
     261        Function * const f = cast<Function>(calls[k].Callee);
     262        auto i = f->arg_begin();
     263        for (auto j = args.begin(); j != args.end(); ++i, ++j) {
     264            assert (i != f->arg_end());
     265            *j = b->CreateZExtOrTrunc(*j, i->getType());
     266        }
     267        assert (i == f->arg_end());
     268        b->CreateCall(f, args);
     269    }
     270    writeOutputScalars(b, pipelineOutput, args);
     271    return args;
     272}
     273
     274/** ------------------------------------------------------------------------------------------------------------- *
     275 * @brief writeOutputScalars
     276 ** ------------------------------------------------------------------------------------------------------------- */
     277void PipelineCompiler::writeOutputScalars(BuilderRef b, const unsigned index, std::vector<Value *> & args) {
     278    const auto n = in_degree(index, mScalarDependencyGraph);
     279    args.resize(n);
     280    const auto pipelineInput = 0;
     281    for (const auto e : make_iterator_range(in_edges(index, mScalarDependencyGraph))) {
     282        const auto scalar = source(e, mScalarDependencyGraph);
     283        // If we have not already retrieved the specific scalar, construct/load/extract it.
     284        if (LLVM_LIKELY(mScalarDependencyGraph[scalar] == nullptr)) {
     285            const auto producer = in_edge(scalar, mScalarDependencyGraph);
     286            const auto i = source(producer, mScalarDependencyGraph);
     287            const auto j = mScalarDependencyGraph[producer];
     288            Value * value = nullptr;
     289            if (LLVM_UNLIKELY(i == pipelineInput)) {
     290                const Binding & input = mPipelineKernel->getInputScalarBinding(j);
     291                const Relationship * const rel = getRelationship(input);
     292                if (isa<ScalarConstant>(rel)) {
     293                    value = cast<ScalarConstant>(rel)->value();
     294                } else {
     295                    value = b->getScalarField(input.getName());
     296                }
     297            } else { // output scalar of some kernel
     298                Value * const outputScalars = mScalarDependencyGraph[i]; assert (outputScalars);
     299                if (outputScalars->getType()->isAggregateType()) {
     300                    value = b->CreateExtractValue(outputScalars, {j});
     301                } else { assert (j == 0 && "scalar type is not an aggregate");
     302                    value = outputScalars;
     303                }
     304            }
     305            mScalarDependencyGraph[scalar] = value;
     306        }
     307        const auto k = mScalarDependencyGraph[e];
     308        args[k] = mScalarDependencyGraph[scalar];
     309    }
     310}
    148311
    149312/** ------------------------------------------------------------------------------------------------------------- *
     
    158321    const auto prefix = makeKernelName(mKernelIndex);
    159322    const auto serialize = codegen::DebugOptionIsSet(codegen::SerializeThreads);
    160     const unsigned waitingOnIdx = serialize ? (mPipeline.size() - 1) : mKernelIndex;
     323    const unsigned waitingOnIdx = serialize ? (mLastKernel - 1) : mKernelIndex;
    161324    const auto waitingOn = makeKernelName(waitingOnIdx);
    162325    Value * const waitingOnPtr = b->getScalarFieldPtr(waitingOn + LOGICAL_SEGMENT_SUFFIX);
     
    241404        auto buffer = mPipelineKernel->getInputStreamSetBuffer(i);
    242405        Value * const handle = buffer->getHandle();
    243 
    244406        indices[1] = b->getInt32(i + FIRST_STREAM_INDEX);
    245 
    246407        b->CreateStore(handle, b->CreateGEP(threadState, indices));
    247408    }
     
    250411        auto buffer = mPipelineKernel->getOutputStreamSetBuffer(i);
    251412        Value * const handle = buffer->getHandle();
    252 
    253413        indices[1] = b->getInt32(i + numOfInputs + FIRST_STREAM_INDEX);
    254 
    255414        b->CreateStore(handle, b->CreateGEP(threadState, indices));
    256415    }
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/popcount_logic.hpp

    r6258 r6261  
    5656        // If this is the producer's final stride, round the index position up
    5757        // to account for a partial stride.
    58         Value * const rounding = b->CreateSelect(mTerminatedPhi, BLOCK_SIZE_MINUS_1, ZERO);
     58        Value * const terminated = b->CreateICmpNE(mTerminatedPhi, b->getSize(NotTerminated));
     59        Value * const rounding = b->CreateSelect(terminated, BLOCK_SIZE_MINUS_1, ZERO);
    5960        Value * const endIndex = b->CreateLShr(b->CreateAdd(produced, rounding), LOG2_BLOCK_WIDTH);
    6061
     
    241242            Value * const total = getTotalItemCount(b, refPortNum);
    242243            Value * const strideLength = getInputStrideLength(b, refPortNum);
    243             Value * const term = producerTerminated(refPortNum);
     244            Value * const term = producerTerminated(b, refPortNum);
    244245            Value * const strideLengthMinus1 = b->CreateSub(strideLength, ONE);
    245246            Value * const padding = b->CreateSelect(term, strideLengthMinus1, b->getSize(0));
     
    574575    }
    575576
    576     const auto firstBuffer = mPipeline.size() + 1;
     577    const auto firstBuffer = mLastKernel + 1;
    577578    const auto lastBuffer = num_vertices(mBufferGraph);
    578579    assert (firstBuffer <= lastBuffer);
     
    629630        return;
    630631    }
    631     const auto firstBuffer = mPipeline.size() + 1;
     632    const auto firstBuffer = mLastKernel + 1;
    632633    const auto lastBuffer = num_vertices(mBufferGraph);
    633634    assert (firstBuffer <= lastBuffer);
     
    663664 ** ------------------------------------------------------------------------------------------------------------- */
    664665inline void PipelineCompiler::initializePopCounts() {
    665     const auto firstBuffer = mPipeline.size() + 1;
     666    const auto firstBuffer = mLastKernel + 1;
    666667    const auto lastBuffer = num_vertices(mBufferGraph);
    667668    assert (firstBuffer < lastBuffer);
     
    678679inline StructType * PipelineCompiler::getPopCountThreadLocalStateType(BuilderRef b) {
    679680
    680     const auto firstBuffer = mPipeline.size() + 1;
     681    const auto firstBuffer = mLastKernel + 1;
    681682    const auto lastBuffer = num_vertices(mBufferGraph);
    682683    assert (firstBuffer < lastBuffer);
     
    740741    for (const auto e : make_iterator_range(in_edges(bufferVertex, mPopCountGraph))) {
    741742        const auto refPort = mPopCountGraph[e].Port;
    742         const auto kernelIndex = parent(source(e, mPopCountGraph), mPopCountGraph);
    743         Kernel * const k = mPipeline[kernelIndex];
     743        const auto kernelVertex = parent(source(e, mPopCountGraph), mPopCountGraph);
     744        const Kernel * const k = mPipeline[kernelVertex];
    744745        const Binding & b = k->getInputStreamSetBinding(refPort);
    745746        assert (b.getRate().isFixed());
     
    813814    for (const auto e : make_iterator_range(out_edges(bufferVertex, mBufferGraph))) {
    814815        const auto port = mBufferGraph[e].Port;
    815         Kernel * const consumer = mBufferGraph[target(e, mBufferGraph)].Kernel;
     816        const auto kernelVertex = target(e, mBufferGraph);
     817        const Kernel * const consumer = mPipeline[kernelVertex];
    816818        const Binding & b = consumer->getInputStreamSetBinding(port);
    817819        if (LLVM_UNLIKELY(b.hasAttribute(AttrId::RequiresPopCountArray))) {
     
    842844    for (const auto e : make_iterator_range(out_edges(bufferVertex, mBufferGraph))) {
    843845        const auto port = mBufferGraph[e].Port;
    844         Kernel * const consumer = mBufferGraph[target(e, mBufferGraph)].Kernel;
     846        const auto kernelVertex = target(e, mBufferGraph);
     847        Kernel * const consumer = mPipeline[kernelVertex];
    845848        const Binding & input = consumer->getInputStreamSetBinding(port);
    846849        if (input.isDeferred() || !input.getRate().isFixed()) {
  • icGREP/icgrep-devel/icgrep/kernels/pipeline_builder.h

    r6260 r6261  
    2626
    2727    std::shared_ptr<OptimizationBranchBuilder>
    28         CreateOptimizationBranch(StreamSet * const condition,
     28        CreateOptimizationBranch(Relationship * const condition,
    2929                                 Bindings && stream_inputs = {}, Bindings && stream_outputs = {},
    3030                                 Bindings && scalar_inputs = {}, Bindings && scalar_outputs = {});
     
    136136protected:
    137137
    138     OptimizationBranchBuilder(BaseDriver & driver, StreamSet * const condition,
     138    OptimizationBranchBuilder(BaseDriver & driver, Relationship * const condition,
    139139                              Bindings && stream_inputs, Bindings && stream_outputs,
    140140                              Bindings && scalar_inputs, Bindings && scalar_outputs);
     
    143143
    144144private:
    145     StreamSet * const                mCondition;
     145    Relationship * const             mCondition;
    146146    std::unique_ptr<PipelineBuilder> mTrueBranch;
    147147    std::unique_ptr<PipelineBuilder> mFalseBranch;
     
    149149
    150150inline std::shared_ptr<OptimizationBranchBuilder> PipelineBuilder::CreateOptimizationBranch (
    151         StreamSet * const condition,
     151        Relationship * const condition,
    152152        Bindings && stream_inputs, Bindings && stream_outputs,
    153153        Bindings && scalar_inputs, Bindings && scalar_outputs) {
  • icGREP/icgrep-devel/icgrep/kernels/pipeline_kernel.h

    r6253 r6261  
    8181protected:
    8282
    83     PipelineKernel(std::string && signature, const unsigned numOfThreads,
     83    PipelineKernel(const std::unique_ptr<KernelBuilder> & b,
     84                   std::string && signature, const unsigned numOfThreads,
    8485                   Kernels && kernels, CallBindings && callBindings,
    8586                   Bindings && stream_inputs, Bindings && stream_outputs,
  • icGREP/icgrep-devel/icgrep/kernels/radix64.cpp

    r6184 r6261  
    163163    BasicBlock * radix64_loop = iBuilder->CreateBasicBlock("radix64_loop");
    164164    BasicBlock * fbExit = iBuilder->CreateBasicBlock("fbExit");
    165    
     165
    166166    const unsigned PACK_SIZE = iBuilder->getStride()/8;
    167167    Constant * packSize = iBuilder->getSize(PACK_SIZE);
     
    273273}
    274274
    275 expand3_4Kernel::expand3_4Kernel(const std::unique_ptr<kernel::KernelBuilder> &, StreamSet *input, StreamSet *expandedOutput)
    276 : MultiBlockKernel("expand3_4",
     275expand3_4Kernel::expand3_4Kernel(const std::unique_ptr<kernel::KernelBuilder> & b, StreamSet *input, StreamSet *expandedOutput)
     276: MultiBlockKernel(b, "expand3_4",
    277277{Binding{"sourceStream", input, FixedRate(3)}},
    278278{Binding{"expand34Stream", expandedOutput, FixedRate(4)}},
     
    281281}
    282282
    283 radix64Kernel::radix64Kernel(const std::unique_ptr<kernel::KernelBuilder> &, StreamSet * input, StreamSet * output)
    284 : BlockOrientedKernel("radix64",
     283radix64Kernel::radix64Kernel(const std::unique_ptr<kernel::KernelBuilder> & b, StreamSet * input, StreamSet * output)
     284: BlockOrientedKernel(b, "radix64",
    285285            {Binding{"expandedStream", input}},
    286286            {Binding{"radix64stream", output}},
     
    288288}
    289289
    290 base64Kernel::base64Kernel(const std::unique_ptr<kernel::KernelBuilder> &, StreamSet * input, StreamSet * output)
    291 : BlockOrientedKernel("base64",
     290base64Kernel::base64Kernel(const std::unique_ptr<kernel::KernelBuilder> & b, StreamSet * input, StreamSet * output)
     291: BlockOrientedKernel(b, "base64",
    292292{Binding{"radix64stream", input}},
    293293{Binding{"base64stream", output, FixedRate(1), RoundUpTo(4)}},
  • icGREP/icgrep-devel/icgrep/kernels/radix64.h

    r6184 r6261  
    1515namespace kernel {
    1616
    17 /*  expand3_4 transforms a byte sequence by duplicating every third byte. 
    18     Each 3 bytes of the input abc produces a 4 byte output abcc.   
     17/*  expand3_4 transforms a byte sequence by duplicating every third byte.
     18    Each 3 bytes of the input abc produces a 4 byte output abcc.
    1919    This is a useful preparatory transformation in various radix-64 encodings. */
    20  
     20
    2121class expand3_4Kernel final : public MultiBlockKernel {
    22 public:   
    23     expand3_4Kernel(const std::unique_ptr<kernel::KernelBuilder> &, StreamSet * input, StreamSet * expandedOutput);
     22public:
     23    expand3_4Kernel(const std::unique_ptr<kernel::KernelBuilder> & b, StreamSet * input, StreamSet * expandedOutput);
    2424    bool isCachable() const override { return true; }
    2525    bool hasSignature() const override { return false; }
  • icGREP/icgrep-devel/icgrep/kernels/random_stream.cpp

    r6120 r6261  
    2323    //
    2424    // The item width (mValueWidth) for the desired random value stream may
    25     // be a single bit or any power of 2.   Determine the number of 32-bit values 
     25    // be a single bit or any power of 2.   Determine the number of 32-bit values
    2626    // returned by rand that are necessary for a full segment of the random
    2727    // value stream.
     
    9797
    9898RandomStreamKernel::RandomStreamKernel(const std::unique_ptr<kernel::KernelBuilder> & b, unsigned seed, unsigned valueWidth, size_t streamLength)
    99 : SegmentOrientedKernel("rand" + std::to_string(valueWidth) + "_" + std::to_string(seed) + "_" + std::to_string(streamLength),
     99: SegmentOrientedKernel(b, "rand" + std::to_string(valueWidth) + "_" + std::to_string(seed) + "_" + std::to_string(streamLength),
    100100// input
    101101{},
  • icGREP/icgrep-devel/icgrep/kernels/s2p_kernel.cpp

    r6236 r6261  
    209209}
    210210
    211 S2PKernel::S2PKernel(const std::unique_ptr<KernelBuilder> &,
     211S2PKernel::S2PKernel(const std::unique_ptr<KernelBuilder> & b,
    212212                     StreamSet * const codeUnitStream,
    213213                     StreamSet * const BasisBits,
    214214                     const cc::BitNumbering numbering,
    215215                     Scalar * signalNullObject)
    216 : MultiBlockKernel((signalNullObject ? "s2pa" : "s2p") + std::to_string(BasisBits->getNumElements()) + cc::numberingSuffix(numbering)
     216: MultiBlockKernel(b, (signalNullObject ? "s2pa" : "s2p") + std::to_string(BasisBits->getNumElements()) + cc::numberingSuffix(numbering)
    217217, {Binding{"byteStream", codeUnitStream, FixedRate(), Principal()}}
    218218, makeOutputBindings(BasisBits, signalNullObject)
     
    244244        const cc::BitNumbering basisNumbering,
    245245        const bool aligned)
    246 : MultiBlockKernel(makeMultiS2PName(outputStreams, basisNumbering, aligned),
     246: MultiBlockKernel(b, makeMultiS2PName(outputStreams, basisNumbering, aligned),
    247247// input
    248248{Binding{"byteStream", codeUnitStream}},
     
    296296
    297297
    298 S2P_21Kernel::S2P_21Kernel(const std::unique_ptr<KernelBuilder> &, StreamSet * const codeUnitStream, StreamSet * const BasisBits, cc::BitNumbering numbering)
    299 : MultiBlockKernel("s2p_21" + cc::numberingSuffix(numbering),
     298S2P_21Kernel::S2P_21Kernel(const std::unique_ptr<KernelBuilder> & b, StreamSet * const codeUnitStream, StreamSet * const BasisBits, cc::BitNumbering numbering)
     299: MultiBlockKernel(b, "s2p_21" + cc::numberingSuffix(numbering),
    300300{Binding{"codeUnitStream", codeUnitStream, FixedRate(), Principal()}},
    301301{Binding{"basisBits", BasisBits}}, {}, {}, {})
  • icGREP/icgrep-devel/icgrep/kernels/s2p_kernel.h

    r6216 r6261  
    1919public:
    2020
    21     S2PKernel(const std::unique_ptr<kernel::KernelBuilder> &,
     21    S2PKernel(const std::unique_ptr<kernel::KernelBuilder> &b,
    2222              StreamSet * const codeUnitStream,
    2323              StreamSet * const BasisBits,
     
    5555class S2P_21Kernel final : public MultiBlockKernel {
    5656public:
    57     S2P_21Kernel(const std::unique_ptr<kernel::KernelBuilder> &, StreamSet * const codeUnitStream, StreamSet * const BasisBits, cc::BitNumbering basisNumbering = cc::BitNumbering::LittleEndian);
     57    S2P_21Kernel(const std::unique_ptr<kernel::KernelBuilder> &b, StreamSet * const codeUnitStream, StreamSet * const BasisBits, cc::BitNumbering basisNumbering = cc::BitNumbering::LittleEndian);
    5858    bool isCachable() const override { return true; }
    5959    bool hasSignature() const override { return false; }
  • icGREP/icgrep-devel/icgrep/kernels/scanmatchgen.cpp

    r6249 r6261  
    142142            Value * const startPtr = b->getRawInputPointer("InputStream", matchRecordStart);
    143143            Value * const endPtr = b->getRawInputPointer("InputStream", matchRecordEnd);
    144 
    145144            auto argi = dispatcher->arg_begin();
    146145            const auto matchRecNumArg = &*(argi++);
     
    209208
    210209ScanMatchKernel::ScanMatchKernel(const std::unique_ptr<kernel::KernelBuilder> & b, StreamSet * const Matches, StreamSet * const LineBreakStream, StreamSet * const ByteStream, Scalar * const callbackObject)
    211 : MultiBlockKernel("scanMatch",
     210: MultiBlockKernel(b, "scanMatch",
    212211// inputs
    213212{Binding{"matchResult", Matches, FixedRate(), Principal()}
  • icGREP/icgrep-devel/icgrep/kernels/source_kernel.cpp

    r6249 r6261  
    195195    BasicBlock * const expandAndCopyBack = b->CreateBasicBlock("ExpandAndCopyBack");
    196196    const auto blockSize = b->getBitBlockWidth() / 8;
    197     Constant * const blockSizeAlignmentMask = ConstantExpr::getNeg(b->getSize(blockSize));
    198     Value * const consumed = b->getConsumedItemCount("sourceBuffer");
    199     Value * const offset = b->CreateAnd(consumed, blockSizeAlignmentMask);
    200     Value * const unreadData = b->getRawOutputPointer("sourceBuffer", offset);
    201     Value * const remainingItems = b->CreateSub(produced, offset);
     197    Value * const consumedItems = b->getConsumedItemCount("sourceBuffer");
     198    ConstantInt * const BLOCK_WIDTH = b->getSize(b->getBitBlockWidth());
     199    Constant * const ALIGNMENT_MASK = ConstantExpr::getNeg(BLOCK_WIDTH);
     200    Value * const consumed = b->CreateAnd(consumedItems, ALIGNMENT_MASK);
     201    Value * const unreadData = b->getRawOutputPointer("sourceBuffer", consumed);
     202    Value * const remainingItems = b->CreateSub(produced, consumed);
    202203    Value * const potentialItems = b->CreateAdd(remainingItems, itemsToRead);
    203204    Value * const remainingBytes = b->CreateMul(remainingItems, codeUnitBytes);
    204     // Have we consumed enough data that we can safely copy back the unconsumed data and still leave enough space
    205     // for one segment without needing a temporary buffer?
     205    // Have we consumed enough data that we can safely copy back the unconsumed data and still
     206    // leave enough space for one segment without needing a temporary buffer?
    206207    Value * const canCopy = b->CreateICmpULT(b->CreateGEP(baseBuffer, potentialItems), unreadData);
    207208    b->CreateLikelyCondBr(canCopy, copyBack, expandAndCopyBack);
     
    220221    // Free the prior buffer if it exists
    221222    Value * const ancillaryBuffer = b->getScalarField("ancillaryBuffer");
     223    b->setScalarField("ancillaryBuffer", baseBuffer);
    222224    b->CreateFree(ancillaryBuffer);
    223     b->setScalarField("ancillaryBuffer", baseBuffer);
    224225    b->setScalarField("buffer", expandedBuffer);
    225226    b->setCapacity("sourceBuffer", expandedCapacity);
     
    230231    newBaseBuffer->addIncoming(baseBuffer, copyBack);
    231232    newBaseBuffer->addIncoming(expandedBuffer, expandAndCopyBack);
    232     Value * const newBaseAddress = b->CreateGEP(newBaseBuffer, b->CreateNeg(offset));
     233    Value * const newBaseAddress = b->CreateGEP(newBaseBuffer, b->CreateNeg(consumed));
    233234    b->setBaseAddress("sourceBuffer", newBaseAddress);
    234235    b->CreateBr(readData);
     
    272273    BasicBlock * finalizeMMap = b->CreateBasicBlock("finalizeMMap");
    273274    BasicBlock * finalizeDone = b->CreateBasicBlock("finalizeDone");
    274     b->CreateCondBr(b->CreateIsNotNull(b->getScalarField("useMMap")), finalizeMMap, finalizeRead);
     275    Value * const useMMap = b->CreateIsNotNull(b->getScalarField("useMMap"));
     276    b->CreateCondBr(useMMap, finalizeMMap, finalizeRead);
    275277    b->SetInsertPoint(finalizeMMap);
    276278    MMapSourceKernel::freeBuffer(b, mCodeUnitWidth);
     
    284286void FDSourceKernel::generateInitializeMethod(const std::unique_ptr<KernelBuilder> & b) {
    285287    BasicBlock * initializeRead = b->CreateBasicBlock("initializeRead");
    286     BasicBlock * tryMMap = b->CreateBasicBlock("tryMMap");
     288    BasicBlock * checkFileSize = b->CreateBasicBlock("checkFileSize");
    287289    BasicBlock * initializeMMap = b->CreateBasicBlock("initializeMMap");
    288290    BasicBlock * initializeDone = b->CreateBasicBlock("initializeDone");
     
    291293    // parameter, possibly overridden.
    292294
    293     Value * useMMap = b->getScalarField("useMMap");
     295    Value * const useMMap = b->getScalarField("useMMap");
    294296    Constant * const ZERO = ConstantInt::getNullValue(useMMap->getType());
    295     useMMap = b->CreateICmpNE(useMMap, ZERO);
    296297    // if the fileDescriptor is 0, the file is stdin, use readSource kernel logic.
    297     Value * fd = b->getScalarField("fileDescriptor");
    298     Value * notStdIn = b->CreateICmpNE(fd, b->getInt32(STDIN_FILENO));
    299     useMMap = b->CreateAnd(useMMap, notStdIn);
    300     b->CreateCondBr(useMMap, tryMMap, initializeRead);
    301 
    302     b->SetInsertPoint(tryMMap);
     298    Value * const fd = b->getScalarField("fileDescriptor");
     299    Value * const notStdIn = b->CreateICmpNE(fd, b->getInt32(STDIN_FILENO));
     300    Value * const tryMMap = b->CreateICmpNE(useMMap, ZERO);
     301    b->CreateCondBr(b->CreateAnd(tryMMap, notStdIn), checkFileSize, initializeRead);
     302
     303    b->SetInsertPoint(checkFileSize);
    303304    // If the fileSize is 0, we may have a virtual file such as /proc/cpuinfo
    304     Value * fileSize = b->CreateZExtOrTrunc(b->CreateCall(mFileSizeFunction, fd), b->getSizeTy());
    305     useMMap = b->CreateICmpNE(fileSize, b->getSize(0));
    306     b->CreateCondBr(useMMap, initializeMMap, initializeRead);
     305    Value * const fileSize = b->CreateCall(mFileSizeFunction, fd);
     306    Value * const emptyFile = b->CreateIsNotNull(fileSize);
     307    b->CreateUnlikelyCondBr(emptyFile, initializeRead, initializeMMap);
    307308
    308309    b->SetInsertPoint(initializeMMap);
     
    315316    ReadSourceKernel::generateInitializeMethod(mCodeUnitWidth, mStride,b);
    316317    b->CreateBr(initializeDone);
     318
    317319    b->SetInsertPoint(initializeDone);
    318320}
     
    322324    BasicBlock * DoSegmentMMap = b->CreateBasicBlock("DoSegmentMMap");
    323325    BasicBlock * DoSegmentDone = b->CreateBasicBlock("DoSegmentDone");
    324     b->CreateCondBr(b->CreateTrunc(b->getScalarField("useMMap"), b->getInt1Ty()), DoSegmentMMap, DoSegmentRead);
     326    Value * const useMMap = b->CreateIsNotNull(b->getScalarField("useMMap"));
     327    b->CreateCondBr(useMMap, DoSegmentMMap, DoSegmentRead);
    325328    b->SetInsertPoint(DoSegmentMMap);
    326329    MMapSourceKernel::generateDoSegmentMethod(mCodeUnitWidth, mStride, b);
     
    411414
    412415MMapSourceKernel::MMapSourceKernel(const std::unique_ptr<kernel::KernelBuilder> & b, Scalar * const fd, StreamSet * const outputStream)
    413 : SegmentOrientedKernel("mmap_source" + std::to_string(codegen::SegmentSize) + "@" + std::to_string(outputStream->getFieldWidth())
     416: SegmentOrientedKernel(b, "mmap_source" + std::to_string(codegen::SegmentSize) + "@" + std::to_string(outputStream->getFieldWidth())
    414417// input streams
    415418,{}
     
    432435
    433436ReadSourceKernel::ReadSourceKernel(const std::unique_ptr<kernel::KernelBuilder> & b, Scalar * const fd, StreamSet * const outputStream)
    434 : SegmentOrientedKernel("read_source" + std::to_string(codegen::SegmentSize) + "@" + std::to_string(outputStream->getFieldWidth())
     437: SegmentOrientedKernel(b, "read_source" + std::to_string(codegen::SegmentSize) + "@" + std::to_string(outputStream->getFieldWidth())
    435438// input streams
    436439,{}
     
    453456
    454457FDSourceKernel::FDSourceKernel(const std::unique_ptr<kernel::KernelBuilder> & b, Scalar * const useMMap, Scalar * const fd, StreamSet * const outputStream)
    455 : SegmentOrientedKernel("FD_source" + std::to_string(codegen::SegmentSize) + "@" + std::to_string(outputStream->getFieldWidth())
     458: SegmentOrientedKernel(b, "FD_source" + std::to_string(codegen::SegmentSize) + "@" + std::to_string(outputStream->getFieldWidth())
    456459// input streams
    457460,{}
     
    474477}
    475478
    476 MemorySourceKernel::MemorySourceKernel(const std::unique_ptr<kernel::KernelBuilder> &, Scalar * fileSource, Scalar * fileItems, StreamSet * const outputStream)
    477 : SegmentOrientedKernel("memory_source" + std::to_string(codegen::SegmentSize) + "@" + std::to_string(outputStream->getFieldWidth()) + ":" + std::to_string(outputStream->getNumElements()),
     479MemorySourceKernel::MemorySourceKernel(const std::unique_ptr<kernel::KernelBuilder> & b, Scalar * fileSource, Scalar * fileItems, StreamSet * const outputStream)
     480: SegmentOrientedKernel(b, "memory_source" + std::to_string(codegen::SegmentSize) + "@" + std::to_string(outputStream->getFieldWidth()) + ":" + std::to_string(outputStream->getNumElements()),
    478481// input streams
    479482{},
  • icGREP/icgrep-devel/icgrep/kernels/source_kernel.h

    r6184 r6261  
    1414    friend class FDSourceKernel;
    1515public:
    16     MMapSourceKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, Scalar * const fd, StreamSet * const outputStream);
     16    MMapSourceKernel(const std::unique_ptr<kernel::KernelBuilder> & b, Scalar * const fd, StreamSet * const outputStream);
    1717    bool isCachable() const override { return true; }
    1818    bool hasSignature() const override { return false; }
     
    4242    friend class FDSourceKernel;
    4343public:
    44     ReadSourceKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, Scalar * const fd, StreamSet * const outputStream);
     44    ReadSourceKernel(const std::unique_ptr<kernel::KernelBuilder> & b, Scalar * const fd, StreamSet * const outputStream);
    4545    bool isCachable() const override { return true; }
    4646    bool hasSignature() const override { return false; }
     
    6464class FDSourceKernel final : public SegmentOrientedKernel {
    6565public:
    66     FDSourceKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, Scalar * const useMMap, Scalar * const fd, StreamSet * const outputStream);
     66    FDSourceKernel(const std::unique_ptr<kernel::KernelBuilder> & b, Scalar * const useMMap, Scalar * const fd, StreamSet * const outputStream);
    6767    bool isCachable() const override { return true; }
    6868    bool hasSignature() const override { return false; }
     
    7575    llvm::Function * mFileSizeFunction;
    7676};
    77    
     77
    7878class MemorySourceKernel final : public SegmentOrientedKernel {
    7979public:
    80     MemorySourceKernel(const std::unique_ptr<kernel::KernelBuilder> &, Scalar * fileSource, Scalar * fileItems, StreamSet * const outputStream);
     80    MemorySourceKernel(const std::unique_ptr<kernel::KernelBuilder> & b, Scalar * fileSource, Scalar * fileItems, StreamSet * const outputStream);
    8181    bool hasSignature() const override { return false; }
    8282protected:
  • icGREP/icgrep-devel/icgrep/kernels/stdout_kernel.cpp

    r6241 r6261  
    3232
    3333StdOutKernel::StdOutKernel(const std::unique_ptr<kernel::KernelBuilder> & b, StreamSet * codeUnitBuffer)
    34 : SegmentOrientedKernel("stdout" + std::to_string(codeUnitBuffer->getFieldWidth()),
     34: SegmentOrientedKernel(b, "stdout" + std::to_string(codeUnitBuffer->getFieldWidth()),
    3535// input
    3636{Binding{"codeUnitBuffer", codeUnitBuffer}}
     
    129129
    130130FileSink::FileSink(const std::unique_ptr<kernel::KernelBuilder> & b, Scalar * outputFileName, StreamSet * codeUnitBuffer)
    131 : SegmentOrientedKernel("filesink" + std::to_string(codeUnitBuffer->getFieldWidth()),
     131: SegmentOrientedKernel(b, "filesink" + std::to_string(codeUnitBuffer->getFieldWidth()),
    132132// input
    133133{Binding{"codeUnitBuffer", codeUnitBuffer}},
  • icGREP/icgrep-devel/icgrep/kernels/streams_merge.cpp

    r6184 r6261  
    3434}
    3535
    36 StreamsMerge::StreamsMerge(const std::unique_ptr<kernel::KernelBuilder> &, const std::vector<StreamSet *> & inputs, StreamSet * output)
    37 : BlockOrientedKernel(makeKernelName("streamsMerge", inputs, output), {}, {}, {}, {}, {}) {
     36StreamsMerge::StreamsMerge(const std::unique_ptr<kernel::KernelBuilder> & b, const std::vector<StreamSet *> & inputs, StreamSet * output)
     37: BlockOrientedKernel(b, makeKernelName("streamsMerge", inputs, output), {}, {}, {}, {}, {}) {
    3838    for (unsigned i = 0; i < inputs.size(); i++) {
    3939        mInputStreamSets.push_back(Binding{"input" + std::to_string(i), inputs[i]});
     
    6767}
    6868
    69 StreamsIntersect::StreamsIntersect(const std::unique_ptr<kernel::KernelBuilder> &, const std::vector<StreamSet *> & inputs, StreamSet * output)
    70 : BlockOrientedKernel(makeKernelName("streamsIntersect", inputs, output), {}, {}, {}, {}, {}) {
     69StreamsIntersect::StreamsIntersect(const std::unique_ptr<kernel::KernelBuilder> & b, const std::vector<StreamSet *> & inputs, StreamSet * output)
     70: BlockOrientedKernel(b, makeKernelName("streamsIntersect", inputs, output), {}, {}, {}, {}, {}) {
    7171    for (unsigned i = 0; i < inputs.size(); i++) {
    7272        mInputStreamSets.push_back(Binding{"input" + std::to_string(i), inputs[i]});
     
    9999}
    100100
    101 StreamsCombineKernel::StreamsCombineKernel(const std::unique_ptr<kernel::KernelBuilder> &iBuilder,
     101StreamsCombineKernel::StreamsCombineKernel(const std::unique_ptr<kernel::KernelBuilder> & b,
    102102                                                     std::vector<unsigned> streamsNumOfSets)
    103         : BlockOrientedKernel("StreamsCombineKernel" , {}, {}, {}, {}, {}),
    104           mStreamsNumOfSets(streamsNumOfSets) {
     103: BlockOrientedKernel(b, "StreamsCombineKernel" , {}, {}, {}, {}, {})
     104, mStreamsNumOfSets(streamsNumOfSets) {
    105105    int total = 0;
    106106    for (unsigned i = 0; i < streamsNumOfSets.size(); i++) {
    107107        total += streamsNumOfSets[i];
    108         mInputStreamSets.push_back(Binding{iBuilder->getStreamSetTy(streamsNumOfSets[i], 1), "inputGroup" + std::to_string(i)});
     108        mInputStreamSets.push_back(Binding{b->getStreamSetTy(streamsNumOfSets[i], 1), "inputGroup" + std::to_string(i)});
    109109    }
    110     mOutputStreamSets.push_back(Binding{iBuilder->getStreamSetTy(total, 1), "output"});
     110    mOutputStreamSets.push_back(Binding{b->getStreamSetTy(total, 1), "output"});
    111111}
    112112
     
    130130
    131131
    132 StreamsSplitKernel::StreamsSplitKernel(const std::unique_ptr<kernel::KernelBuilder> &iBuilder,
     132StreamsSplitKernel::StreamsSplitKernel(const std::unique_ptr<kernel::KernelBuilder> &b,
    133133                                       std::vector<unsigned> streamsNumOfSets)
    134         : BlockOrientedKernel("StreamsSplitKernel" , {}, {}, {}, {}, {}),
    135           mStreamsNumOfSets(streamsNumOfSets){
     134: BlockOrientedKernel(b, "StreamsSplitKernel" , {}, {}, {}, {}, {})
     135, mStreamsNumOfSets(streamsNumOfSets){
    136136    int total = 0;
    137137    for (unsigned i = 0; i < streamsNumOfSets.size(); i++) {
    138138        total += streamsNumOfSets[i];
    139         mOutputStreamSets.push_back(Binding{iBuilder->getStreamSetTy(streamsNumOfSets[i], 1), "outputGroup" + std::to_string(i)});
     139        mOutputStreamSets.push_back(Binding{b->getStreamSetTy(streamsNumOfSets[i], 1), "outputGroup" + std::to_string(i)});
    140140    }
    141     mInputStreamSets.push_back(Binding{iBuilder->getStreamSetTy(total, 1), "input"});
     141    mInputStreamSets.push_back(Binding{b->getStreamSetTy(total, 1), "input"});
    142142}
    143143
  • icGREP/icgrep-devel/icgrep/kernels/streams_merge.h

    r6184 r6261  
    1414
    1515class StreamsMerge : public BlockOrientedKernel {
    16 public:   
     16public:
    1717    StreamsMerge(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, const std::vector<StreamSet *> & inputs, StreamSet * output);
    18 protected:   
     18protected:
    1919    void generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
    2020};
     
    2222class StreamsCombineKernel : public BlockOrientedKernel {
    2323public:
    24     StreamsCombineKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, std::vector<unsigned> streamsNumOfSets);
     24    StreamsCombineKernel(const std::unique_ptr<kernel::KernelBuilder> & b, std::vector<unsigned> streamsNumOfSets);
    2525protected:
    2626    void generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
     
    3131class StreamsSplitKernel : public BlockOrientedKernel {
    3232public:
    33     StreamsSplitKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, std::vector<unsigned> streamsNumOfSets);
     33    StreamsSplitKernel(const std::unique_ptr<kernel::KernelBuilder> & b, std::vector<unsigned> streamsNumOfSets);
    3434protected:
    3535    void generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
     
    4646
    4747}
    48    
     48
    4949#endif
    5050
  • icGREP/icgrep-devel/icgrep/kernels/streamset.cpp

    r6258 r6261  
    6868    }
    6969    return b->getSize(count);
     70}
     71
     72/**
     73 * @brief getRawItemPointer
     74 *
     75 * get a raw pointer the iN field at position absoluteItemPosition of the stream number streamIndex of the stream set.
     76 * In the case of a stream whose fields are less than one byte (8 bits) in size, the pointer is to the containing byte.
     77 * The type of the pointer is i8* for fields of 8 bits or less, otherwise iN* for N-bit fields.
     78 */
     79Value * StreamSetBuffer::getRawItemPointer(IDISA_Builder * const b, Value * absolutePosition) const {
     80    Value * ptr = getBaseAddress(b);
     81    Type * const elemTy = mBaseType->getArrayElementType()->getVectorElementType();
     82    const auto bw = elemTy->getPrimitiveSizeInBits();
     83    assert (is_power_2(bw));
     84    if (bw < 8) {
     85        Constant * const fw = ConstantInt::get(absolutePosition->getType(), 8 / bw);
     86        if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
     87            b->CreateAssertZero(b->CreateURem(absolutePosition, fw), "absolutePosition must be byte aligned");
     88        }
     89        absolutePosition = b->CreateUDiv(absolutePosition, fw);
     90        ptr = b->CreatePointerCast(ptr, b->getInt8PtrTy());
     91    } else {
     92        ptr = b->CreatePointerCast(ptr, elemTy->getPointerTo());
     93    }
     94    return b->CreateGEP(ptr, absolutePosition);
    7095}
    7196
     
    146171}
    147172
    148 /**
    149  * @brief getRawItemPointer
    150  *
    151  * get a raw pointer the iN field at position absoluteItemPosition of the stream number streamIndex of the stream set.
    152  * In the case of a stream whose fields are less than one byte (8 bits) in size, the pointer is to the containing byte.
    153  * The type of the pointer is i8* for fields of 8 bits or less, otherwise iN* for N-bit fields.
    154  */
    155 Value * ExternalBuffer::getRawItemPointer(IDISA_Builder * const b, Value * const absolutePosition) const {
    156     Value * ptr = getBaseAddress(b);
    157     Value * relativePosition = absolutePosition;
    158     Type * const elemTy = mBaseType->getArrayElementType()->getVectorElementType();
    159     const auto bw = elemTy->getPrimitiveSizeInBits();
    160     assert (is_power_2(bw));
    161     if (bw < 8) {
    162         Constant * const fw = ConstantInt::get(relativePosition->getType(), 8 / bw);
    163         if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
    164             b->CreateAssertZero(b->CreateURem(absolutePosition, fw), "absolutePosition must be byte aligned");
    165         }
    166         relativePosition = b->CreateUDiv(relativePosition, fw);
    167         ptr = b->CreatePointerCast(ptr, b->getInt8PtrTy());
    168     } else {
    169         ptr = b->CreatePointerCast(ptr, elemTy->getPointerTo());
    170     }
    171     return b->CreateGEP(ptr, relativePosition);
    172 }
    173 
    174173inline void ExternalBuffer::assertValidBlockIndex(IDISA_Builder * const b, Value * blockIndex) const {
    175174    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
     
    271270
    272271Value * StaticBuffer::getRawItemPointer(IDISA_Builder * const b, Value * const absolutePosition) const {
    273     Value * ptr = getBaseAddress(b);
    274     Value * relativePosition = b->CreateURem(absolutePosition, getCapacity(b));
    275     Type * const elemTy = mBaseType->getArrayElementType()->getVectorElementType();
    276     const auto bw = elemTy->getPrimitiveSizeInBits();
    277     assert (is_power_2(bw));
    278     if (bw < 8) {
    279         Constant * const fw = ConstantInt::get(relativePosition->getType(), 8 / bw);
    280         relativePosition = b->CreateUDiv(relativePosition, fw);
    281         ptr = b->CreatePointerCast(ptr, b->getInt8PtrTy());
    282     } else {
    283         ptr = b->CreatePointerCast(ptr, elemTy->getPointerTo());
    284     }
    285     return b->CreateGEP(ptr, relativePosition);
     272    return StreamSetBuffer::getRawItemPointer(b, b->CreateURem(absolutePosition, getCapacity(b)));
    286273}
    287274
     
    391378
    392379Value * DynamicBuffer::getRawItemPointer(IDISA_Builder * const b, Value * absolutePosition) const {
    393     Value * base = getBaseAddress(b);
    394     Value * relativePosition = b->CreateURem(absolutePosition, getCapacity(b));
    395     Type * const elemTy = mBaseType->getArrayElementType()->getVectorElementType();
    396     const auto bw = elemTy->getPrimitiveSizeInBits();
    397     assert (is_power_2(bw));
    398     if (bw < 8) {
    399         Constant * const fw = ConstantInt::get(relativePosition->getType(), 8 / bw);
    400         relativePosition = b->CreateUDiv(relativePosition, fw);
    401         base = b->CreatePointerCast(base, b->getInt8PtrTy());
    402     } else {
    403         base = b->CreatePointerCast(base, elemTy->getPointerTo());
    404     }
    405     return b->CreateGEP(base, relativePosition);
     380    return StreamSetBuffer::getRawItemPointer(b, b->CreateURem(absolutePosition, getCapacity(b)));
    406381}
    407382
  • icGREP/icgrep-devel/icgrep/kernels/streamset.h

    r6255 r6261  
    9797    virtual llvm::Value * getCapacity(IDISA::IDISA_Builder * const b) const = 0;
    9898
    99     virtual llvm::Value * getRawItemPointer(IDISA::IDISA_Builder * const b, llvm::Value * absolutePosition) const = 0;
     99    virtual llvm::Value * getRawItemPointer(IDISA::IDISA_Builder * const b, llvm::Value * absolutePosition) const;
    100100
    101101    virtual llvm::Value * getStreamLogicalBasePtr(IDISA::IDISA_Builder * const b, llvm::Value * const streamIndex, llvm::Value * blockIndex) const = 0;
     
    168168    llvm::Value * getOverflowAddress(IDISA::IDISA_Builder * const b) const override;
    169169
    170     llvm::Value * getRawItemPointer(IDISA::IDISA_Builder * const b, llvm::Value * absolutePosition) const override;
    171 
    172170private:
    173171
     
    219217    llvm::Value * getRawItemPointer(IDISA::IDISA_Builder * const b, llvm::Value * absolutePosition) const override;
    220218
     219    size_t getCapacity() const {
     220        return mCapacity;
     221    }
     222
    221223private:
    222224
     
    257259    size_t getOverflowCapacity(const std::unique_ptr<kernel::KernelBuilder> & b) const override;
    258260
     261    size_t getInitialCapacity() const {
     262        return mInitialCapacity;
     263    }
     264
     265
    259266protected:
    260267
  • icGREP/icgrep-devel/icgrep/kernels/swizzle.cpp

    r6186 r6261  
    7474}
    7575
    76 SwizzleGenerator::SwizzleGenerator(const std::unique_ptr<kernel::KernelBuilder> &,
     76SwizzleGenerator::SwizzleGenerator(const std::unique_ptr<kernel::KernelBuilder> & b,
    7777                                   const std::vector<StreamSet *> & inputs,
    7878                                   const std::vector<StreamSet *> & outputs,
    7979                                   const unsigned fieldWidth)
    80 : BlockOrientedKernel(makeSwizzleName(inputs, outputs, fieldWidth),
     80: BlockOrientedKernel(b, makeSwizzleName(inputs, outputs, fieldWidth),
    8181makeSwizzledInputs(inputs),
    8282makeSwizzledOutputs(outputs, fieldWidth),
     
    8888
    8989void SwizzleGenerator::generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & b) {
    90        
     90
    9191    // We may need a few passes depending on the swizzle factor
    9292
     
    107107    Value * targetBlocks[swizzleFactor];
    108108    for (unsigned grp = 0; grp < swizzleGroups; grp++) {
    109         // First load all the data.       
     109        // First load all the data.
    110110        for (unsigned i = 0; i < swizzleFactor; i++) {
    111111            const auto streamNo = grp * swizzleFactor + i;
     
    139139
    140140
    141 SwizzleByGather::SwizzleByGather(const std::unique_ptr<KernelBuilder> &iBuilder)
    142 : BlockOrientedKernel("swizzleByGather", {}, {}, {}, {}, {}){
     141SwizzleByGather::SwizzleByGather(const std::unique_ptr<KernelBuilder> & b)
     142: BlockOrientedKernel(b, "swizzleByGather", {}, {}, {}, {}, {}){
    143143    for (unsigned i = 0; i < 2; i++) {
    144         mInputStreamSets.push_back(Binding{iBuilder->getStreamSetTy(4, 1), "inputGroup" + std::to_string(i)});
     144        mInputStreamSets.push_back(Binding{b->getStreamSetTy(4, 1), "inputGroup" + std::to_string(i)});
    145145    }
    146146    for (unsigned i = 0; i < 1; i++) {
    147         mOutputStreamSets.push_back(Binding{iBuilder->getStreamSetTy(8, 1), "outputGroup" + std::to_string(i), FixedRate(1)});
     147        mOutputStreamSets.push_back(Binding{b->getStreamSetTy(8, 1), "outputGroup" + std::to_string(i), FixedRate(1)});
    148148    }
    149149}
  • icGREP/icgrep-devel/icgrep/kernels/swizzle.h

    r6189 r6261  
    1111
    1212// The SwizzleGenerator class creates a kernel that transforms a set of bit streams into a swizzled form.
    13 // In swizzled form, one "swizzle field" each from a set of streams are grouped together to be processed 
     13// In swizzled form, one "swizzle field" each from a set of streams are grouped together to be processed
    1414// as a unit using SIMD operations.   For example, for a swizzle field width of 64 and a block size of 256,
    1515// 4 streams are swizzled together to be operated on as a group.  The ratio of the block size to the
     
    5151class SwizzleGenerator : public BlockOrientedKernel {
    5252public:
    53    
     53
    5454    SwizzleGenerator(const std::unique_ptr<kernel::KernelBuilder> &, const std::vector<StreamSet *> & inputs, const std::vector<StreamSet *> & outputs, const unsigned fieldWidth = sizeof(size_t) * 8);
    55    
     55
    5656protected:
    57    
     57
    5858    void generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & b) override;
    59    
     59
    6060private:
    6161    const unsigned mBitStreamCount;
     
    6666class SwizzleByGather : public BlockOrientedKernel {
    6767public:
    68     SwizzleByGather(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
     68    SwizzleByGather(const std::unique_ptr<kernel::KernelBuilder> & b);
    6969
    7070protected:
     
    7474
    7575}
    76    
     76
    7777#endif
    7878
  • icGREP/icgrep-devel/icgrep/kernels/swizzled_multiple_pdep_kernel.cpp

    r6184 r6261  
    1414
    1515SwizzledMultiplePDEPkernel::SwizzledMultiplePDEPkernel(const std::unique_ptr<kernel::KernelBuilder> & b, const unsigned swizzleFactor, const unsigned numberOfStreamSet, std::string name)
    16 : MultiBlockKernel(std::move(name),
     16: MultiBlockKernel(b, std::move(name),
    1717// input stream sets
    1818{Binding{b->getStreamSetTy(), "marker", FixedRate(), Principal()},
  • icGREP/icgrep-devel/icgrep/kernels/until_n.cpp

    r6197 r6261  
    1818void UntilNkernel::generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & b, llvm::Value * const numOfStrides) {
    1919
    20 /* 
     20/*
    2121   Strategy:  first form an index consisting of one bit per packsize input positions,
    2222   with a 1 bit signifying that the corresponding pack has at least one 1 bit.
     
    2525   found identifies an input pack with a nonzero popcount.  Take the actual popcount
    2626   of the corresponding input pack and update the total number of bits seen.   If
    27    the number of bits seen reaches N with any pack, determine the position of the 
     27   the number of bits seen reaches N with any pack, determine the position of the
    2828   Nth bit and signal termination at that point.
    29  
     29
    3030   For normal processing, we process whole blocks only, always advanced processed
    3131   and produced item counts by an integral number of blocks.   For final block
     
    179179}
    180180
    181 UntilNkernel::UntilNkernel(const std::unique_ptr<kernel::KernelBuilder> &, Scalar * maxCount, StreamSet * AllMatches, StreamSet * Matches)
    182 : MultiBlockKernel("UntilN",
     181UntilNkernel::UntilNkernel(const std::unique_ptr<kernel::KernelBuilder> & b, Scalar * maxCount, StreamSet * AllMatches, StreamSet * Matches)
     182: MultiBlockKernel(b, "UntilN",
    183183// inputs
    184184{Binding{"bits", AllMatches}},
  • icGREP/icgrep-devel/icgrep/kernels/until_n.h

    r6184 r6261  
    1212class UntilNkernel final : public MultiBlockKernel {
    1313public:
    14     UntilNkernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, Scalar * maxCount, StreamSet * AllMatches, StreamSet * Matches);
     14    UntilNkernel(const std::unique_ptr<kernel::KernelBuilder> & b, Scalar * maxCount, StreamSet * AllMatches, StreamSet * Matches);
    1515private:
    1616    void generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & b, llvm::Value * const numOfStrides) final;
  • icGREP/icgrep-devel/icgrep/pablo/pablo_kernel.cpp

    r6228 r6261  
    255255                         std::vector<Binding> scalar_parameters,
    256256                         std::vector<Binding> scalar_outputs)
    257 : BlockOrientedKernel(annotateKernelNameWithPabloDebugFlags(std::move(kernelName)),
     257: BlockOrientedKernel(b, annotateKernelNameWithPabloDebugFlags(std::move(kernelName)),
    258258                      std::move(stream_inputs), std::move(stream_outputs),
    259259                      std::move(scalar_parameters), std::move(scalar_outputs),
  • icGREP/icgrep-devel/icgrep/re/re_nullable.cpp

    r6256 r6261  
    4242    } else if (const Rep* re_rep = dyn_cast<const Rep>(re)) {
    4343        return (re_rep->getLB() == 0) || isNullable(re_rep->getRE());
    44     } else if (const Diff * d = dyn_cast<const Diff>(re)) {
     44    } else if (isa<Diff>(re)) {
    4545        // a Diff of Seq({}) and an Assertion represents a complemented assertion.
    4646        //return isNullable(d->getLH()) && (!isNullable(d->getRH())) && (!isZeroWidth(d->getRH()));
     
    171171    return NullableSuffixRemover().transformRE(r);
    172172}
    173    
     173
    174174}
  • icGREP/icgrep-devel/icgrep/u32u8.cpp

    r6253 r6261  
    9494
    9595UTF8fieldDepositMask::UTF8fieldDepositMask(const std::unique_ptr<KernelBuilder> & b, StreamSet * u32basis, StreamSet * u8fieldMask, StreamSet * u8unitCounts, unsigned depositFieldWidth)
    96 : BlockOrientedKernel("u8depositMask",
     96: BlockOrientedKernel(b, "u8depositMask",
    9797{Binding{"basis", u32basis}},
    9898{Binding{"fieldDepositMask", u8fieldMask, FixedRate(4)},
Note: See TracChangeset for help on using the changeset viewer.