Ignore:
Timestamp:
May 10, 2017, 4:26:11 PM (2 years ago)
Author:
nmedfort
Message:

Large refactoring step. Removed IR generation code from Kernel (formally KernelBuilder?) and moved it into the new KernelBuilder? class.

Location:
icGREP/icgrep-devel/icgrep/kernels
Files:
42 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/alignedprint.cpp

    r5436 r5440  
    1111namespace kernel {
    1212
    13 inline void ap_p2s_step(IDISA::IDISA_Builder * const iBuilder, Value * p0, Value * p1, Value * hi_mask, unsigned shift, Value * &s1, Value * &s0) {
     13inline void ap_p2s_step(const std::unique_ptr<KernelBuilder> & iBuilder, Value * p0, Value * p1, Value * hi_mask, unsigned shift, Value * &s1, Value * &s0) {
    1414    Value * t0 = iBuilder->simd_if(1, hi_mask, p0, iBuilder->simd_srli(16, p1, shift));
    1515    Value * t1 = iBuilder->simd_if(1, hi_mask, iBuilder->simd_slli(16, p0, shift), p1);
     
    1818}
    1919
    20 inline void p2s(IDISA::IDISA_Builder * const iBuilder, Value * p[], Value * s[]) {
     20inline void p2s(const std::unique_ptr<KernelBuilder> & iBuilder, Value * p[], Value * s[]) {
    2121    Value * bit00004444[2];
    2222    Value * bit22226666[2];
     
    3838}
    3939
    40 void PrintableBits::generateDoBlockMethod() {
     40void PrintableBits::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
    4141    // Load current block
    42     Value * bitStrmVal = loadInputStreamBlock("bitStream", iBuilder->getInt32(0));
     42    Value * bitStrmVal = iBuilder->loadInputStreamBlock("bitStream", iBuilder->getInt32(0));
    4343
    4444    Value * bits[8];
     
    8181   
    8282    for (unsigned j = 0; j < 8; ++j) {
    83         storeOutputStreamPack("byteStream", iBuilder->getInt32(0), iBuilder->getInt32(j), iBuilder->bitCast(printableBytes[j]));
    84     }
    85 }
    86 
    87 void SelectStream::generateDoBlockMethod() {
     83        iBuilder->storeOutputStreamPack("byteStream", iBuilder->getInt32(0), iBuilder->getInt32(j), iBuilder->bitCast(printableBytes[j]));
     84    }
     85}
     86
     87void SelectStream::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> &iBuilder) {
    8888    if (mStreamIndex >= mSizeInputStreamSet)
    8989        llvm::report_fatal_error("Stream index out of bounds.\n");
    9090   
    91     Value * bitStrmVal = loadInputStreamBlock("bitStreams", iBuilder->getInt32(mStreamIndex));
    92 
    93     storeOutputStreamBlock("bitStream", iBuilder->getInt32(0), bitStrmVal);
    94 }
    95 
    96 void PrintStreamSet::generateDoBlockMethod() {
     91    Value * bitStrmVal = iBuilder->loadInputStreamBlock("bitStreams", iBuilder->getInt32(mStreamIndex));
     92
     93    iBuilder->storeOutputStreamBlock("bitStream", iBuilder->getInt32(0), bitStrmVal);
     94}
     95
     96void PrintStreamSet::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> &iBuilder) {
    9797
    9898    /*
     
    122122        BasicBlock * entry = iBuilder->GetInsertBlock();
    123123
    124         Value * count = getInputStreamSetCount(name);
     124        Value * count = iBuilder->getInputStreamSetCount(name);
    125125        ConstantInt * const streamLength = iBuilder->getSize(iBuilder->getBitBlockWidth() + mNameWidth + 1);
    126126        Value * output = iBuilder->CreateAlloca(iBuilder->getInt8Ty(), streamLength);
     
    135135
    136136            // Load current block
    137             Value * const input = loadInputStreamBlock(name, iBuilder->getInt32(0));
     137            Value * const input = iBuilder->loadInputStreamBlock(name, iBuilder->getInt32(0));
    138138
    139139            Value * bits[8];
     
    164164            iBuilder->CreateStore(iBuilder->getInt8('['), iBuilder->CreateGEP(output, length));
    165165
    166             BasicBlock * cond = CreateBasicBlock("cond");
    167 
    168             BasicBlock * getIntLength = CreateBasicBlock("getIntLength");
    169 
    170             BasicBlock * writeInt = CreateBasicBlock("writeInt");
    171             BasicBlock * writeVector = CreateBasicBlock("writeVector");
    172 
    173             BasicBlock * exit = CreateBasicBlock("exit");
     166            BasicBlock * cond = iBuilder->CreateBasicBlock("cond");
     167
     168            BasicBlock * getIntLength = iBuilder->CreateBasicBlock("getIntLength");
     169
     170            BasicBlock * writeInt = iBuilder->CreateBasicBlock("writeInt");
     171            BasicBlock * writeVector = iBuilder->CreateBasicBlock("writeVector");
     172
     173            BasicBlock * exit = iBuilder->CreateBasicBlock("exit");
    174174
    175175            ConstantInt * TEN = iBuilder->getSize(10);
     
    217217
    218218            // Load current block
    219             Value * const input = loadInputStreamBlock(name, i);
     219            Value * const input = iBuilder->loadInputStreamBlock(name, i);
    220220
    221221            Value * bits[8];
  • icGREP/icgrep-devel/icgrep/kernels/alignedprint.h

    r5436 r5440  
    1212namespace kernel {
    1313
    14 class PrintableBits : public BlockOrientedKernel {
     14class PrintableBits final : public BlockOrientedKernel {
    1515public:
    1616    PrintableBits(const std::unique_ptr<kernel::KernelBuilder> & builder);
    17     virtual ~PrintableBits() {}
    1817private:
    19     void generateDoBlockMethod() override;
     18    void generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
    2019};
    2120
    22 class SelectStream : public BlockOrientedKernel {
     21class SelectStream final : public BlockOrientedKernel {
    2322public:
    2423    SelectStream(const std::unique_ptr<kernel::KernelBuilder> & builder, unsigned sizeInputStreamSet, unsigned streamIndex);
    25     virtual ~SelectStream() {}
    2624private:
    27     void generateDoBlockMethod() override;
     25    void generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
    2826    unsigned mSizeInputStreamSet;
    2927    unsigned mStreamIndex;
    3028};
    3129
    32 class PrintStreamSet : public BlockOrientedKernel {
     30class PrintStreamSet final : public BlockOrientedKernel {
    3331public:
    3432    PrintStreamSet(const std::unique_ptr<kernel::KernelBuilder> & builder, std::vector<std::string> && names, const unsigned minWidth = 16);
    35     virtual ~PrintStreamSet() {}
    3633private:
    37     void generateDoBlockMethod() override;
     34    void generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
    3835private:
    3936    const std::vector<std::string> mNames;
  • icGREP/icgrep-devel/icgrep/kernels/cc_kernel.cpp

    r5436 r5440  
    2727}
    2828
    29 void DirectCharacterClassKernelBuilder::generateDoBlockMethod() {
     29void DirectCharacterClassKernelBuilder::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
    3030    unsigned packCount = 8 * mCodeUnitSize; 
    3131    unsigned codeUnitWidth = 8 * mCodeUnitSize;
    3232    Value * codeUnitPack[packCount];
    3333    for (unsigned i = 0; i < packCount; i++) {
    34         codeUnitPack[i] = loadInputStreamPack("codeUnitStream", iBuilder->getInt32(0), iBuilder->getInt32(i));
     34        codeUnitPack[i] = iBuilder->loadInputStreamPack("codeUnitStream", iBuilder->getInt32(0), iBuilder->getInt32(i));
    3535    }
    3636    for (unsigned j = 0; j < mCharClasses.size();  j++) {
     
    6666            theCCstream = iBuilder->simd_or(theCCstream, pack);
    6767        }
    68         storeOutputStreamBlock("ccStream", iBuilder->getInt32(j), theCCstream);
     68        iBuilder->storeOutputStreamBlock("ccStream", iBuilder->getInt32(j), theCCstream);
    6969    }
    7070}
  • icGREP/icgrep-devel/icgrep/kernels/cc_kernel.h

    r5436 r5440  
    1515class DirectCharacterClassKernelBuilder final : public BlockOrientedKernel {
    1616public:   
    17     DirectCharacterClassKernelBuilder(const std::unique_ptr<kernel::KernelBuilder> & b, std::string ccSetName, std::vector<re::CC *> charClasses, unsigned codeUnitSize);
    18     void generateDoBlockMethod() override;
     17    DirectCharacterClassKernelBuilder(const std::unique_ptr<KernelBuilder> & b, std::string ccSetName, std::vector<re::CC *> charClasses, unsigned codeUnitSize);
     18    void generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) override;
    1919private:
    2020    const std::vector<re::CC *> mCharClasses;
     
    2525class ParabixCharacterClassKernelBuilder final : public pablo::PabloKernel {
    2626public:
    27     ParabixCharacterClassKernelBuilder(const std::unique_ptr<kernel::KernelBuilder> & b, std::string ccSetName, const std::vector<re::CC *> & charClasses, unsigned codeUnitSize);
     27    ParabixCharacterClassKernelBuilder(const std::unique_ptr<KernelBuilder> & b, std::string ccSetName, const std::vector<re::CC *> & charClasses, unsigned codeUnitSize);
    2828protected:
    2929    void generatePabloMethod() override;
  • icGREP/icgrep-devel/icgrep/kernels/cc_scan_kernel.cpp

    r5436 r5440  
    1414namespace kernel {
    1515
    16 void CCScanKernel::generateDoBlockMethod() {
     16void CCScanKernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
    1717    auto savePoint = iBuilder->saveIP();
    18     Function * scanWordFunction = generateScanWordRoutine(iBuilder->getModule());
     18    Function * scanWordFunction = generateScanWordRoutine(iBuilder);
    1919    iBuilder->restoreIP(savePoint);
    2020
     
    2222    Type * T = iBuilder->getIntNTy(mScanwordBitWidth);
    2323    VectorType * scanwordVectorType =  VectorType::get(T, fieldCount);   
    24     Value * blockNo = getScalarField("BlockNo");
     24    Value * blockNo = iBuilder->getScalarField("BlockNo");
    2525    Value * scanwordPos = iBuilder->CreateMul(blockNo, ConstantInt::get(blockNo->getType(), iBuilder->getBitBlockWidth()));
    2626   
    2727    std::vector<Value * > matchWordVectors;
    2828    for(unsigned d = 0; d < mStreamNum; d++) {
    29         Value * matches = loadInputStreamBlock("matchResults", iBuilder->getInt32(d));
     29        Value * matches = iBuilder->loadInputStreamBlock("matchResults", iBuilder->getInt32(d));
    3030        matchWordVectors.push_back(iBuilder->CreateBitCast(matches, scanwordVectorType));
    3131    }
     
    3838        scanwordPos = iBuilder->CreateAdd(scanwordPos, ConstantInt::get(T, mScanwordBitWidth));
    3939    }   
    40     setScalarField("BlockNo", iBuilder->CreateAdd(blockNo, iBuilder->getSize(1)));
     40    iBuilder->setScalarField("BlockNo", iBuilder->CreateAdd(blockNo, iBuilder->getSize(1)));
    4141}
    4242
    43 Function * CCScanKernel::generateScanWordRoutine(Module * m) const {
     43Function * CCScanKernel::generateScanWordRoutine(const std::unique_ptr<KernelBuilder> & iBuilder) const {
    4444
    4545    IntegerType * T = iBuilder->getIntNTy(mScanwordBitWidth);
     46
     47    Module * const m = iBuilder->getModule();
    4648
    4749    Function * scanFunc = cast<Function>(m->getOrInsertFunction("scan_word", iBuilder->getVoidTy(), T, iBuilder->getInt32Ty(), T, nullptr));
  • icGREP/icgrep-devel/icgrep/kernels/cc_scan_kernel.h

    r5436 r5440  
    1818       
    1919private:
    20     void generateDoBlockMethod() override;
    21     llvm::Function * generateScanWordRoutine(llvm::Module * m) const;
     20    void generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) override;
     21    llvm::Function * generateScanWordRoutine(const std::unique_ptr<KernelBuilder> & iBuilder) const;
    2222       
    2323    unsigned mStreamNum;
  • icGREP/icgrep-devel/icgrep/kernels/deletion.cpp

    r5436 r5440  
    1212namespace kernel {
    1313
    14 inline std::vector<Value *> parallel_prefix_deletion_masks(IDISA::IDISA_Builder * const iBuilder, const unsigned fw, Value * del_mask) {
     14inline std::vector<Value *> parallel_prefix_deletion_masks(const std::unique_ptr<KernelBuilder> & iBuilder, const unsigned fw, Value * del_mask) {
    1515    Value * m = iBuilder->simd_not(del_mask);
    1616    Value * mk = iBuilder->simd_slli(fw, del_mask, 1);
     
    2929}
    3030
    31 inline Value * apply_parallel_prefix_deletion(IDISA::IDISA_Builder * const iBuilder, const unsigned fw, Value * del_mask, const std::vector<Value *> & mv, Value * strm) {
     31inline Value * apply_parallel_prefix_deletion(const std::unique_ptr<KernelBuilder> & iBuilder, const unsigned fw, Value * del_mask, const std::vector<Value *> & mv, Value * strm) {
    3232    Value * s = iBuilder->simd_and(strm, iBuilder->simd_not(del_mask));
    3333    for (unsigned i = 0; i < mv.size(); i++) {
     
    3939}
    4040
    41 inline Value * partial_sum_popcount(IDISA::IDISA_Builder * const iBuilder, const unsigned fw, Value * mask) {
     41inline Value * partial_sum_popcount(const std::unique_ptr<KernelBuilder> & iBuilder, const unsigned fw, Value * mask) {
    4242    Value * field = iBuilder->simd_popcount(fw, mask);
    4343    const auto count = iBuilder->getBitBlockWidth() / fw;
     
    5252// Outputs: the deleted streams, plus a partial sum popcount
    5353
    54 void DeletionKernel::generateDoBlockMethod() {
    55     Value * delMask = loadInputStreamBlock("delMaskSet", iBuilder->getInt32(0));
     54void DeletionKernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
     55    Value * delMask = iBuilder->loadInputStreamBlock("delMaskSet", iBuilder->getInt32(0));
    5656    const auto move_masks = parallel_prefix_deletion_masks(iBuilder, mDeletionFieldWidth, delMask);
    5757    for (unsigned j = 0; j < mStreamCount; ++j) {
    58         Value * input = loadInputStreamBlock("inputStreamSet", iBuilder->getInt32(j));
     58        Value * input = iBuilder->loadInputStreamBlock("inputStreamSet", iBuilder->getInt32(j));
    5959        Value * output = apply_parallel_prefix_deletion(iBuilder, mDeletionFieldWidth, delMask, move_masks, input);
    60         storeOutputStreamBlock("outputStreamSet", iBuilder->getInt32(j), output);
     60        iBuilder->storeOutputStreamBlock("outputStreamSet", iBuilder->getInt32(j), output);
    6161    }
    6262    Value * delCount = partial_sum_popcount(iBuilder, mDeletionFieldWidth, iBuilder->simd_not(delMask));
    63     storeOutputStreamBlock("deletionCounts", iBuilder->getInt32(0), iBuilder->bitCast(delCount));
    64 }
    65 
    66 void DeletionKernel::generateFinalBlockMethod(Value * remainingBytes) {
     63    iBuilder->storeOutputStreamBlock("deletionCounts", iBuilder->getInt32(0), iBuilder->bitCast(delCount));
     64}
     65
     66void DeletionKernel::generateFinalBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder, Value * remainingBytes) {
    6767    IntegerType * vecTy = iBuilder->getIntNTy(iBuilder->getBitBlockWidth());
    6868    Value * remaining = iBuilder->CreateZExt(remainingBytes, vecTy);
    6969    Value * EOF_del = iBuilder->bitCast(iBuilder->CreateShl(Constant::getAllOnesValue(vecTy), remaining));
    70     Value * delMask = iBuilder->CreateOr(EOF_del, loadInputStreamBlock("delMaskSet", iBuilder->getInt32(0)));
     70    Value * delMask = iBuilder->CreateOr(EOF_del, iBuilder->loadInputStreamBlock("delMaskSet", iBuilder->getInt32(0)));
    7171    const auto move_masks = parallel_prefix_deletion_masks(iBuilder, mDeletionFieldWidth, delMask);
    7272    for (unsigned j = 0; j < mStreamCount; ++j) {
    73         Value * input = loadInputStreamBlock("inputStreamSet", iBuilder->getInt32(j));
     73        Value * input = iBuilder->loadInputStreamBlock("inputStreamSet", iBuilder->getInt32(j));
    7474        Value * output = apply_parallel_prefix_deletion(iBuilder, mDeletionFieldWidth, delMask, move_masks, input);
    75         storeOutputStreamBlock("outputStreamSet", iBuilder->getInt32(j), output);
     75        iBuilder->storeOutputStreamBlock("outputStreamSet", iBuilder->getInt32(j), output);
    7676    }
    7777    Value * delCount = partial_sum_popcount(iBuilder, mDeletionFieldWidth, iBuilder->simd_not(delMask));
    78     storeOutputStreamBlock("deletionCounts", iBuilder->getInt32(0), iBuilder->bitCast(delCount));
     78    iBuilder->storeOutputStreamBlock("deletionCounts", iBuilder->getInt32(0), iBuilder->bitCast(delCount));
    7979}
    8080
     
    9292const unsigned PEXT_width = 64;
    9393
    94 inline std::vector<Value *> get_PEXT_masks(IDISA::IDISA_Builder * const iBuilder, Value * del_mask) {
     94inline std::vector<Value *> get_PEXT_masks(const std::unique_ptr<KernelBuilder> & iBuilder, Value * del_mask) {
    9595    Value * m = iBuilder->fwCast(PEXT_width, iBuilder->simd_not(del_mask));
    9696    std::vector<Value *> masks;
     
    103103// Apply PEXT deletion to a collection of blocks and swizzle the result.
    104104// strms contains the blocks to process
    105 inline std::vector<Value *> apply_PEXT_deletion_with_swizzle(IDISA::IDISA_Builder * const iBuilder, const std::vector<Value *> & masks, std::vector<Value *> strms) {
     105inline std::vector<Value *> apply_PEXT_deletion_with_swizzle(const std::unique_ptr<KernelBuilder> & iBuilder, const std::vector<Value *> & masks, std::vector<Value *> strms) {
    106106    Value * PEXT_func = nullptr;
    107107    if (PEXT_width == 64) {
     
    146146}
    147147
    148 inline Value * apply_PEXT_deletion(IDISA::IDISA_Builder * const iBuilder, const std::vector<Value *> & masks, Value * strm) {
     148inline Value * apply_PEXT_deletion(const std::unique_ptr<KernelBuilder> & iBuilder, const std::vector<Value *> & masks, Value * strm) {
    149149    Value * PEXT_func = nullptr;
    150150    if (PEXT_width == 64) {
     
    168168// Outputs: swizzles containing the swizzled deleted streams, plus a partial sum popcount
    169169
    170 void DeleteByPEXTkernel::generateDoBlockMethod() {
    171     Value * delMask = loadInputStreamBlock("delMaskSet", iBuilder->getInt32(0));
     170void DeleteByPEXTkernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
     171    Value * delMask = iBuilder->loadInputStreamBlock("delMaskSet", iBuilder->getInt32(0));
    172172    const auto masks = get_PEXT_masks(iBuilder, delMask);
    173     generateProcessingLoop(masks, delMask);
    174 }
    175 
    176 void DeleteByPEXTkernel::generateFinalBlockMethod(Value * remainingBytes) {
     173    generateProcessingLoop(iBuilder, masks, delMask);
     174}
     175
     176void DeleteByPEXTkernel::generateFinalBlockMethod(const std::unique_ptr<KernelBuilder> &iBuilder, Value * remainingBytes) {
    177177    IntegerType * vecTy = iBuilder->getIntNTy(iBuilder->getBitBlockWidth());
    178178    Value * remaining = iBuilder->CreateZExt(remainingBytes, vecTy);
    179179    Value * EOF_del = iBuilder->bitCast(iBuilder->CreateShl(Constant::getAllOnesValue(vecTy), remaining));
    180     Value * delMask = iBuilder->CreateOr(EOF_del, loadInputStreamBlock("delMaskSet", iBuilder->getInt32(0)));
     180    Value * delMask = iBuilder->CreateOr(EOF_del, iBuilder->loadInputStreamBlock("delMaskSet", iBuilder->getInt32(0)));
    181181    const auto masks = get_PEXT_masks(iBuilder, delMask);
    182     generateProcessingLoop(masks, delMask);
    183 }
    184 
    185 void DeleteByPEXTkernel::generateProcessingLoop(const std::vector<Value *> & masks, Value * delMask) {
    186     if (mShouldSwizzle)    
    187         generatePEXTAndSwizzleLoop(masks);
    188     else
    189         generatePEXTLoop(masks);   
    190    
     182    generateProcessingLoop(iBuilder, masks, delMask);
     183}
     184
     185void DeleteByPEXTkernel::generateProcessingLoop(const std::unique_ptr<KernelBuilder> & iBuilder, const std::vector<Value *> & masks, Value * delMask) {
     186    if (mShouldSwizzle) {
     187        generatePEXTAndSwizzleLoop(iBuilder, masks);
     188    } else {
     189        generatePEXTLoop(iBuilder, masks);
     190    }
    191191    //Value * delCount = partial_sum_popcount(iBuilder, mDelCountFieldWidth, apply_PEXT_deletion(iBuilder, masks, iBuilder->simd_not(delMask)));
    192192    Value * delCount = iBuilder->simd_popcount(mDelCountFieldWidth, iBuilder->simd_not(delMask));
    193     storeOutputStreamBlock("deletionCounts", iBuilder->getInt32(0), iBuilder->bitCast(delCount));
    194 }
    195 
    196 void DeleteByPEXTkernel::generatePEXTLoop(const std::vector<Value *> & masks) {
     193    iBuilder->storeOutputStreamBlock("deletionCounts", iBuilder->getInt32(0), iBuilder->bitCast(delCount));
     194}
     195
     196void DeleteByPEXTkernel::generatePEXTLoop(const std::unique_ptr<KernelBuilder> &iBuilder, const std::vector<Value *> & masks) {
    197197    for (unsigned j = 0; j < mStreamCount; ++j) {
    198         Value * input = loadInputStreamBlock("inputStreamSet", iBuilder->getInt32(j));
     198        Value * input = iBuilder->loadInputStreamBlock("inputStreamSet", iBuilder->getInt32(j));
    199199        Value * output = apply_PEXT_deletion(iBuilder, masks, input);
    200         storeOutputStreamBlock("outputStreamSet", iBuilder->getInt32(j), output);
    201     }
    202 }
    203 
    204 void DeleteByPEXTkernel::generatePEXTAndSwizzleLoop(const std::vector<Value *> & masks) {
     200        iBuilder->storeOutputStreamBlock("outputStreamSet", iBuilder->getInt32(j), output);
     201    }
     202}
     203
     204void DeleteByPEXTkernel::generatePEXTAndSwizzleLoop(const std::unique_ptr<KernelBuilder> & iBuilder, const std::vector<Value *> & masks) {
    205205    // Group blocks together into input vector. Input should contain mStreamCount/mSwizzleFactor blocks (e.g. for U8U16 16/4=4)
    206206    // mStreamCount/mSwizzleFactor -> (mStreamCount + mSwizzleFactor - 1) / mSwizzleFactor
     
    210210        for (unsigned i = streamSelectionIndex; i < (streamSelectionIndex + mSwizzleFactor); ++i) {
    211211                // Check if i > mStreamCount. If it is, add null streams until we get mStreamCount/mSwizzleFactor streams in the input vector
    212             if ( i >= mStreamCount)
     212            if ( i >= mStreamCount) {
    213213                                input.push_back(iBuilder->allZeroes());
    214                         else
    215                         input.push_back(loadInputStreamBlock("inputStreamSet", iBuilder->getInt32(i)));
     214            } else {
     215                input.push_back(iBuilder->loadInputStreamBlock("inputStreamSet", iBuilder->getInt32(i)));
     216            }
    216217        }
    217218        std::vector<Value *> output = apply_PEXT_deletion_with_swizzle(iBuilder, masks, input);
    218219        for (unsigned i = 0; i < mSwizzleFactor; i++) {
    219              storeOutputStreamBlock(std::string(mOutputSwizzleNameBase) + std::to_string(j), iBuilder->getInt32(i), output[i]);
     220             iBuilder->storeOutputStreamBlock(std::string(mOutputSwizzleNameBase) + std::to_string(j), iBuilder->getInt32(i), output[i]);
    220221        }
    221222    }
     
    280281}
    281282   
    282 void SwizzledBitstreamCompressByCount::generateDoBlockMethod() {
     283void SwizzledBitstreamCompressByCount::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
    283284       
    284     Value * countStreamPtr = iBuilder->CreateBitCast(getInputStreamBlockPtr("countsPerStride", iBuilder->getInt32(0)), iBuilder->getIntNTy(mFieldWidth)->getPointerTo());
     285    Value * countsPerStridePtr = iBuilder->getInputStreamBlockPtr("countsPerStride", iBuilder->getInt32(0));
     286    Value * countStreamPtr = iBuilder->CreatePointerCast(countsPerStridePtr, iBuilder->getIntNTy(mFieldWidth)->getPointerTo());
    285287   
    286288    // Output is written and committed to the output buffer one swizzle at a time.
     
    289291    Constant * outputIndexShift = iBuilder->getSize(std::log2(mFieldWidth));
    290292   
    291     Value * outputProduced = getProducedItemCount("outputSwizzle0"); // All output groups have the same count.
     293    Value * outputProduced = iBuilder->getProducedItemCount("outputSwizzle0"); // All output groups have the same count.
    292294    Value * producedOffset = iBuilder->CreateAnd(outputProduced, blockOffsetMask);
    293295    Value * outputIndex = iBuilder->CreateLShr(producedOffset, outputIndexShift);
    294296
    295297    // There may be pending data in the kernel state, for up to mFieldWidth-1 bits per stream.
    296     Value * pendingOffset = getScalarField("pendingOffset");
     298    Value * pendingOffset = iBuilder->getScalarField("pendingOffset");
    297299    // There is a separate vector of pending data for each swizzle group.
    298300    std::vector<Value *> pendingData;
    299301    std::vector<Value *> outputStreamPtr;
    300302    for (unsigned i = 0; i < mSwizzleSetCount; i++) {
    301         pendingData.push_back(getScalarField("pendingSwizzleData" + std::to_string(i)));
    302         outputStreamPtr.push_back(getOutputStreamBlockPtr("outputSwizzle" + std::to_string(i), iBuilder->getInt32(0)));
     303        pendingData.push_back(iBuilder->getScalarField("pendingSwizzleData" + std::to_string(i)));
     304        outputStreamPtr.push_back(iBuilder->getOutputStreamBlockPtr("outputSwizzle" + std::to_string(i), iBuilder->getInt32(0)));
    303305    }
    304306   
     
    313315        // according to the same newItemCount, pendingSpace, ...
    314316        for (unsigned j = 0; j < mSwizzleSetCount; j++) {
    315             Value * newItems = loadInputStreamBlock("inputSwizzle" + std::to_string(j), iBuilder->getInt32(i));
     317            Value * newItems = iBuilder->loadInputStreamBlock("inputSwizzle" + std::to_string(j), iBuilder->getInt32(i));
    316318            // Combine as many of the new items as possible into the pending group.
    317319            Value * combinedGroup = iBuilder->CreateOr(pendingData[j], iBuilder->CreateShl(newItems, iBuilder->simd_fill(mFieldWidth, pendingOffset)));
     
    327329        pendingOffset = iBuilder->CreateAnd(iBuilder->CreateAdd(newItemCount, pendingOffset), iBuilder->getSize(mFieldWidth-1));
    328330    }
    329     setScalarField("pendingOffset", pendingOffset);
     331    iBuilder->setScalarField("pendingOffset", pendingOffset);
    330332   
    331333    Value * newlyProduced = iBuilder->CreateSub(iBuilder->CreateShl(outputIndex, outputIndexShift), producedOffset);
    332334    Value * produced = iBuilder->CreateAdd(outputProduced, newlyProduced);
    333335    for (unsigned j = 0; j < mSwizzleSetCount; j++) {
    334         setScalarField("pendingSwizzleData" + std::to_string(j), pendingData[j]);
    335     }
    336     setProducedItemCount("outputSwizzle0", produced);
    337 }
    338 
    339 void SwizzledBitstreamCompressByCount::generateFinalBlockMethod(Value * remainingBytes) {
    340     CreateDoBlockMethodCall();
     336        iBuilder->setScalarField("pendingSwizzleData" + std::to_string(j), pendingData[j]);
     337    }
     338    iBuilder->setProducedItemCount("outputSwizzle0", produced);
     339}
     340
     341void SwizzledBitstreamCompressByCount::generateFinalBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder, Value * /* remainingBytes */) {
     342    CreateDoBlockMethodCall(iBuilder);
    341343    Constant * blockOffsetMask = iBuilder->getSize(iBuilder->getBitBlockWidth() - 1);
    342344    Constant * outputIndexShift = iBuilder->getSize(std::log2(mFieldWidth));
    343345   
    344     Value * outputProduced = getProducedItemCount("outputSwizzle0"); // All output groups have the same count.
     346    Value * outputProduced = iBuilder->getProducedItemCount("outputSwizzle0"); // All output groups have the same count.
    345347    Value * producedOffset = iBuilder->CreateAnd(outputProduced, blockOffsetMask);
    346348    Value * outputIndex = iBuilder->CreateLShr(producedOffset, outputIndexShift);
    347     Value * pendingOffset = getScalarField("pendingOffset");
     349    Value * pendingOffset = iBuilder->getScalarField("pendingOffset");
    348350
    349351    // Write the pending data.
    350352    for (unsigned i = 0; i < mSwizzleSetCount; i++) {
    351         Value * pendingData = getScalarField("pendingSwizzleData" + std::to_string(i));
    352         Value * outputStreamPtr = getOutputStreamBlockPtr("outputSwizzle" + std::to_string(i), iBuilder->getInt32(0));
     353        Value * pendingData = iBuilder->getScalarField("pendingSwizzleData" + std::to_string(i));
     354        Value * outputStreamPtr = iBuilder->getOutputStreamBlockPtr("outputSwizzle" + std::to_string(i), iBuilder->getInt32(0));
    353355        iBuilder->CreateBlockAlignedStore(pendingData, iBuilder->CreateGEP(outputStreamPtr, outputIndex));
    354356    }
    355     setProducedItemCount("outputSwizzle0", iBuilder->CreateAdd(pendingOffset, outputProduced));
    356 }
    357 }
     357    iBuilder->setProducedItemCount("outputSwizzle0", iBuilder->CreateAdd(pendingOffset, outputProduced));
     358}
     359}
  • icGREP/icgrep-devel/icgrep/kernels/deletion.h

    r5436 r5440  
    2929    bool moduleIDisSignature() const override { return true; }
    3030protected:
    31     void generateDoBlockMethod() override;
    32     void generateFinalBlockMethod(llvm::Value * remainingBytes) override;
     31    void generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) override;
     32    void generateFinalBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder, llvm::Value * remainingBytes) override;
    3333private:
    3434    const unsigned mDeletionFieldWidth;
     
    4242    bool moduleIDisSignature() const override { return true; }
    4343protected:
    44     void generateDoBlockMethod() override;
    45     void generateFinalBlockMethod(llvm::Value * remainingBytes) override;
    46     void generatePEXTAndSwizzleLoop(const std::vector<llvm::Value *> & masks);
    47     void generatePEXTLoop(const std::vector<llvm::Value *> & masks);
    48     void generateProcessingLoop(const std::vector<llvm::Value *> & masks, llvm::Value * delMask);   
     44    void generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) override;
     45    void generateFinalBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder, llvm::Value * remainingBytes) override;
     46    void generatePEXTAndSwizzleLoop(const std::unique_ptr<KernelBuilder> & iBuilder, const std::vector<llvm::Value *> & masks);
     47    void generatePEXTLoop(const std::unique_ptr<KernelBuilder> & iBuilder, const std::vector<llvm::Value *> & masks);
     48    void generateProcessingLoop(const std::unique_ptr<KernelBuilder> & iBuilder, const std::vector<llvm::Value *> & masks, llvm::Value * delMask);
    4949private:
    5050    const unsigned mDelCountFieldWidth;
     
    6161    bool moduleIDisSignature() const override { return true; }
    6262protected:
    63     void generateDoBlockMethod() override;
    64     void generateFinalBlockMethod(llvm::Value * remainingBytes) override;   
     63    void generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) override;
     64    void generateFinalBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder, llvm::Value * remainingBytes) override;
    6565private:
    6666    const unsigned mBitStreamCount;
  • icGREP/icgrep-devel/icgrep/kernels/evenodd.cpp

    r5436 r5440  
    1111namespace kernel {
    1212
    13 void EvenOddKernel::generateDoBlockMethod() {
     13void EvenOddKernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
    1414    Value * even = iBuilder->simd_fill(64, iBuilder->getInt64(0x5555555555555555));
    1515    Value * odd = iBuilder->bitCast(iBuilder->simd_fill(8, iBuilder->getInt8(0xAA)));
    16     storeOutputStreamBlock("even_odd", iBuilder->getInt32(0), even);
    17     storeOutputStreamBlock("even_odd", iBuilder->getInt32(1), odd);
     16    iBuilder->storeOutputStreamBlock("even_odd", iBuilder->getInt32(0), even);
     17    iBuilder->storeOutputStreamBlock("even_odd", iBuilder->getInt32(1), odd);
    1818}
    1919
  • icGREP/icgrep-devel/icgrep/kernels/evenodd.h

    r5436 r5440  
    1212namespace kernel {
    1313
    14 class EvenOddKernel : public BlockOrientedKernel {
     14class EvenOddKernel final : public BlockOrientedKernel {
    1515public:
    1616    EvenOddKernel(const std::unique_ptr<kernel::KernelBuilder> & builder);
    17     virtual ~EvenOddKernel() {}
    1817private:
    19     void generateDoBlockMethod() override;
    20 
     18    void generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
    2119};
    2220
  • icGREP/icgrep-devel/icgrep/kernels/grep_kernel.cpp

    r5436 r5440  
    4040}
    4141
    42 std::string ICgrepKernelBuilder::makeSignature() {
     42std::string ICgrepKernelBuilder::makeSignature(const std::unique_ptr<kernel::KernelBuilder> &) {
    4343    return mSignature;
    4444}
     
    4848}
    4949
    50 void InvertMatchesKernel::generateDoBlockMethod() {
    51     Value * input = loadInputStreamBlock("matchedLines", iBuilder->getInt32(0));
    52     Value * lbs = loadInputStreamBlock("lineBreaks", iBuilder->getInt32(0));
     50void InvertMatchesKernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
     51    Value * input = iBuilder->loadInputStreamBlock("matchedLines", iBuilder->getInt32(0));
     52    Value * lbs = iBuilder->loadInputStreamBlock("lineBreaks", iBuilder->getInt32(0));
    5353    Value * inverted = iBuilder->CreateXor(input, lbs);
    54     storeOutputStreamBlock("nonMatches", iBuilder->getInt32(0), inverted);
     54    iBuilder->storeOutputStreamBlock("nonMatches", iBuilder->getInt32(0), inverted);
    5555}
    5656
  • icGREP/icgrep-devel/icgrep/kernels/grep_kernel.h

    r5436 r5440  
    1515public:
    1616    ICgrepKernelBuilder(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, re::RE * const re_ast);   
    17     std::string makeSignature() override;
     17    std::string makeSignature(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
    1818    bool isCachable() const override { return true; }
    1919protected:
     
    2828    InvertMatchesKernel(const std::unique_ptr<kernel::KernelBuilder> & builder);
    2929private:
    30     void generateDoBlockMethod() override;
     30    void generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
    3131};
    3232
  • icGREP/icgrep-devel/icgrep/kernels/interface.cpp

    r5438 r5440  
    126126}
    127127
    128 void KernelInterface::addKernelDeclarations() {
     128void KernelInterface::addKernelDeclarations(const std::unique_ptr<kernel::KernelBuilder> & idb) {
    129129
    130130    if (mKernelStateType == nullptr) {
     
    132132    }
    133133
    134     Module * const module = iBuilder->getModule();
     134    Module * const module = idb->getModule();
    135135    PointerType * const selfType = mKernelStateType->getPointerTo();
    136     IntegerType * const sizeTy = iBuilder->getSizeTy();
     136    IntegerType * const sizeTy = idb->getSizeTy();
    137137    PointerType * const consumerTy = StructType::get(sizeTy, sizeTy->getPointerTo()->getPointerTo(), nullptr)->getPointerTo();
    138     Type * const voidTy = iBuilder->getVoidTy();
     138    Type * const voidTy = idb->getVoidTy();
    139139
    140140    // Create the initialization function prototype
     
    155155    }
    156156    for (auto binding : mStreamSetOutputs) {
    157         (args++)->setName(binding.name + "ConsumerLocks");
     157        (++args)->setName(binding.name + "ConsumerLocks");
    158158    }
    159159
    160160    // Create the doSegment function prototype.
    161     std::vector<Type *> params = {selfType, iBuilder->getInt1Ty()};
     161    std::vector<Type *> params = {selfType, idb->getInt1Ty()};
    162162    params.insert(params.end(), mStreamSetInputs.size(), sizeTy);
    163163
     
    177177    Type * resultType = nullptr;
    178178    if (mScalarOutputs.empty()) {
    179         resultType = iBuilder->getVoidTy();
     179        resultType = idb->getVoidTy();
    180180    } else {
    181181        const auto n = mScalarOutputs.size();
     
    187187            resultType = outputType[0];
    188188        } else {
    189             resultType = StructType::get(iBuilder->getContext(), ArrayRef<Type *>(outputType, n));
     189            resultType = StructType::get(idb->getContext(), ArrayRef<Type *>(outputType, n));
    190190        }
    191191    }
     
    198198    args->setName("self");
    199199
    200     linkExternalMethods();
     200    linkExternalMethods(idb);
    201201}
    202202
  • icGREP/icgrep-devel/icgrep/kernels/interface.h

    r5438 r5440  
    9292    virtual bool isCachable() const = 0;
    9393
    94     virtual std::string makeSignature() = 0;
     94    virtual std::string makeSignature(const std::unique_ptr<kernel::KernelBuilder> & idb) = 0;
    9595
    9696    const std::vector<Binding> & getStreamInputs() const {
     
    127127
    128128    // Add ExternalLinkage method declarations for the kernel to a given client module.
    129     void addKernelDeclarations();
    130 
    131     virtual void linkExternalMethods() = 0;
    132 
    133     virtual llvm::Value * createInstance() = 0;
    134 
    135     virtual void initializeInstance() = 0;
    136 
    137     virtual void finalizeInstance() = 0;
     129    void addKernelDeclarations(const std::unique_ptr<kernel::KernelBuilder> & idb);
     130
     131    virtual void linkExternalMethods(const std::unique_ptr<kernel::KernelBuilder> & idb) = 0;
     132
     133    virtual llvm::Value * createInstance(const std::unique_ptr<kernel::KernelBuilder> & idb) = 0;
     134
     135    virtual void initializeInstance(const std::unique_ptr<kernel::KernelBuilder> & idb) = 0;
     136
     137    virtual void finalizeInstance(const std::unique_ptr<kernel::KernelBuilder> & idb) = 0;
    138138
    139139    void setInitialArguments(std::vector<llvm::Value *> && args) {
     
    145145    }
    146146
     147    void setInstance(llvm::Value * const instance) {
     148        assert ("kernel instance cannot be null!" && instance);
     149        assert ("kernel instance must point to a valid kernel state type!" && (instance->getType()->getPointerElementType() == mKernelStateType));
     150        mKernelInstance = instance;
     151    }
     152
    147153    unsigned getLookAhead() const {
    148154        return mLookAheadPositions;
     
    153159    }
    154160
    155     kernel::KernelBuilder * getBuilder() const {
    156         return iBuilder;
    157     }
    158 
    159     void setBuilder(const std::unique_ptr<kernel::KernelBuilder> & builder) {
    160         iBuilder = builder.get();
    161     }
    162 
    163161protected:
    164 
    165     virtual llvm::Value * getProducedItemCount(const std::string & name, llvm::Value * doFinal = nullptr) const = 0;
    166 
    167     virtual void setProducedItemCount(const std::string & name, llvm::Value * value) const = 0;
    168 
    169     virtual llvm::Value * getProcessedItemCount(const std::string & name) const = 0;
    170 
    171     virtual void setProcessedItemCount(const std::string & name, llvm::Value * value) const = 0;
    172 
    173     virtual llvm::Value * getConsumedItemCount(const std::string & name) const = 0;
    174 
    175     virtual void setConsumedItemCount(const std::string & name, llvm::Value * value) const = 0;
    176 
    177     virtual llvm::Value * getTerminationSignal() const = 0;
    178 
    179     virtual void setTerminationSignal() const = 0;
    180162
    181163    llvm::Function * getInitFunction(llvm::Module * const module) const;
     
    191173                    std::vector<Binding> && scalar_outputs,
    192174                    std::vector<Binding> && internal_scalars)
    193     : iBuilder(nullptr)
     175    : mKernelInstance(nullptr)
    194176    , mModule(nullptr)
    195     , mKernelInstance(nullptr)
    196177    , mKernelStateType(nullptr)
    197178    , mLookAheadPositions(0)
     
    201182    , mScalarInputs(scalar_inputs)
    202183    , mScalarOutputs(scalar_outputs)
    203     , mInternalScalars(internal_scalars)
    204     {
     184    , mInternalScalars(internal_scalars) {
    205185
    206186    }
    207187   
    208     void setInstance(llvm::Value * const instance) {
    209         assert ("kernel instance cannot be null!" && instance);
    210         assert ("kernel instance must point to a valid kernel state type!" && (instance->getType()->getPointerElementType() == mKernelStateType));
    211         mKernelInstance = instance;
    212     }
    213 
    214188protected:
    215    
    216     kernel::KernelBuilder *                 iBuilder;
     189
     190    llvm::Value *                           mKernelInstance;
    217191    llvm::Module *                          mModule;
    218 
    219     llvm::Value *                           mKernelInstance;
    220192    llvm::StructType *                      mKernelStateType;
    221193    unsigned                                mLookAheadPositions;
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5439 r5440  
    5757}
    5858
    59 // Get the value of a scalar field for the current instance.
    60 llvm::Value * Kernel::getScalarFieldPtr(llvm::Value * index) const {
    61     return iBuilder->CreateGEP(getInstance(), {iBuilder->getInt32(0), index});
    62 }
    63 
    64 llvm::Value * Kernel::getScalarFieldPtr(const std::string & fieldName) const {
    65     return getScalarFieldPtr(iBuilder->getInt32(getScalarIndex(fieldName)));
    66 }
    67 
    68 llvm::Value * Kernel::getScalarField(const std::string & fieldName) const {
    69     return iBuilder->CreateLoad(getScalarFieldPtr(fieldName), fieldName);
    70 }
    71 
    72 // Set the value of a scalar field for the current instance.
    73 void Kernel::setScalarField(const std::string & fieldName, llvm::Value * value) const {
    74     iBuilder->CreateStore(value, getScalarFieldPtr(fieldName));
    75 }
    76 
    7759void Kernel::prepareStreamSetNameMap() {
    7860    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
     
    8466}
    8567   
    86 void Kernel::prepareKernel() {
    87     assert ("KernelBuilder does not have a valid IDISA Builder" && iBuilder);
     68void Kernel::createKernelStub(const std::unique_ptr<KernelBuilder> & idb, const StreamSetBuffers & inputs, const StreamSetBuffers & outputs) {
     69    assert ("KernelBuilder does not have a valid IDISA Builder" && idb);
     70    assert ("IDISA Builder does not have a valid Module" && idb->getModule());
     71    std::stringstream cacheName;   
     72    cacheName << getName() << '_' << idb->getBuilderUniqueName();
     73    for (const StreamSetBuffer * b: inputs) {
     74        cacheName <<  ':' <<  b->getUniqueID();
     75    }
     76    for (const StreamSetBuffer * b: outputs) {
     77        cacheName <<  ':' <<  b->getUniqueID();
     78    }
     79    Module * const kernelModule = new Module(cacheName.str(), idb->getContext());
     80    createKernelStub(idb, inputs, outputs, kernelModule);
     81}
     82
     83void Kernel::createKernelStub(const std::unique_ptr<KernelBuilder> & idb, const StreamSetBuffers & inputs, const StreamSetBuffers & outputs, Module * const kernelModule) {
     84    assert (mModule == nullptr);
     85    assert ("KernelBuilder does not have a valid IDISA Builder" && idb);
     86    assert (mStreamSetInputBuffers.empty());
     87    assert (mStreamSetOutputBuffers.empty());
     88
     89    if (LLVM_UNLIKELY(mStreamSetInputs.size() != inputs.size())) {
     90        report_fatal_error(getName() + ": expected " + std::to_string(mStreamSetInputs.size()) +
     91                           " input stream sets but was given "
     92                           + std::to_string(inputs.size()));
     93    }
     94
     95    for (unsigned i = 0; i < inputs.size(); ++i) {
     96        StreamSetBuffer * const buf = inputs[i];
     97        if (LLVM_UNLIKELY(buf == nullptr)) {
     98            report_fatal_error(getName() + ": input stream set " + std::to_string(i)
     99                               + " cannot be null");
     100        }
     101        buf->addConsumer(this);
     102    }
     103
     104    if (LLVM_UNLIKELY(mStreamSetOutputs.size() != outputs.size())) {
     105        report_fatal_error(getName() + ": expected " + std::to_string(mStreamSetOutputs.size())
     106                           + " output stream sets but was given "
     107                           + std::to_string(outputs.size()));
     108    }
     109
     110    for (unsigned i = 0; i < outputs.size(); ++i) {
     111        StreamSetBuffer * const buf = outputs[i];
     112        if (LLVM_UNLIKELY(buf == nullptr)) {
     113            report_fatal_error(getName() + ": output stream set " + std::to_string(i) + " cannot be null");
     114        }
     115        if (LLVM_LIKELY(buf->getProducer() == nullptr)) {
     116            buf->setProducer(this);
     117        } else {
     118            report_fatal_error(getName() + ": output stream set " + std::to_string(i)
     119                               + " is already produced by kernel " + buf->getProducer()->getName());
     120        }
     121    }
     122
     123    mModule = kernelModule;
     124    mStreamSetInputBuffers.assign(inputs.begin(), inputs.end());
     125    mStreamSetOutputBuffers.assign(outputs.begin(), outputs.end());
     126    prepareKernel(idb);
     127}
     128
     129void Kernel::prepareKernel(const std::unique_ptr<KernelBuilder> & idb) {
     130    assert ("KernelBuilder does not have a valid IDISA Builder" && idb);
    88131    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
    89132        report_fatal_error("Cannot prepare kernel after kernel state finalized");
     
    103146        report_fatal_error(out.str());
    104147    }
    105     const auto blockSize = iBuilder->getBitBlockWidth();
     148    const auto blockSize = idb->getBitBlockWidth();
    106149    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
    107150        if ((mStreamSetInputBuffers[i]->getBufferBlocks() > 0) && (mStreamSetInputBuffers[i]->getBufferBlocks() < codegen::SegmentSize + (blockSize + mLookAheadPositions - 1)/blockSize)) {
     
    110153        mScalarInputs.emplace_back(mStreamSetInputBuffers[i]->getPointerType(), mStreamSetInputs[i].name + BUFFER_PTR_SUFFIX);
    111154        if ((i == 0) || !mStreamSetInputs[i].rate.isExact()) {
    112             addScalar(iBuilder->getSizeTy(), mStreamSetInputs[i].name + PROCESSED_ITEM_COUNT_SUFFIX);
    113         }       
    114     }
    115 
    116     IntegerType * const sizeTy = iBuilder->getSizeTy();
     155            addScalar(idb->getSizeTy(), mStreamSetInputs[i].name + PROCESSED_ITEM_COUNT_SUFFIX);
     156        }
     157    }
     158
     159    IntegerType * const sizeTy = idb->getSizeTy();
    117160    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    118161        mScalarInputs.emplace_back(mStreamSetOutputBuffers[i]->getPointerType(), mStreamSetOutputs[i].name + BUFFER_PTR_SUFFIX);
     
    140183
    141184    addScalar(sizeTy, LOGICAL_SEGMENT_NO_SCALAR);
    142     addScalar(iBuilder->getInt1Ty(), TERMINATION_SIGNAL);
     185    addScalar(idb->getInt1Ty(), TERMINATION_SIGNAL);
    143186
    144187    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
     
    146189    }
    147190
    148     mKernelStateType = StructType::create(iBuilder->getContext(), mKernelFields, getName());
    149 }
    150 
    151 void Kernel::createKernelStub(const StreamSetBuffers & inputs, const StreamSetBuffers & outputs) {
    152     assert ("KernelBuilder does not have a valid IDISA Builder" && iBuilder);
    153     assert ("IDISA Builder does not have a valid Module" && iBuilder->getModule());
    154     std::stringstream cacheName;   
    155     cacheName << getName() << '_' << iBuilder->getBuilderUniqueName();
    156     for (const StreamSetBuffer * b: inputs) {
    157         cacheName <<  ':' <<  b->getUniqueID();
    158     }
    159     for (const StreamSetBuffer * b: outputs) {
    160         cacheName <<  ':' <<  b->getUniqueID();
    161     }
    162     Module * const kernelModule = new Module(cacheName.str(), iBuilder->getContext());
    163     kernelModule->setTargetTriple(iBuilder->getModule()->getTargetTriple());
    164     createKernelStub(inputs, outputs, kernelModule);
    165 }
    166 
    167 void Kernel::createKernelStub(const StreamSetBuffers & inputs, const StreamSetBuffers & outputs, Module * const kernelModule) {
    168     assert (mModule == nullptr);
    169     assert ("KernelBuilder does not have a valid IDISA Builder" && iBuilder);
    170     assert (mStreamSetInputBuffers.empty());
    171     assert (mStreamSetOutputBuffers.empty());
    172 
    173     if (LLVM_UNLIKELY(mStreamSetInputs.size() != inputs.size())) {
    174         report_fatal_error(getName() + ": expected " + std::to_string(mStreamSetInputs.size()) +
    175                            " input stream sets but was given "
    176                            + std::to_string(inputs.size()));
    177     }
    178 
    179     for (unsigned i = 0; i < inputs.size(); ++i) {
    180         StreamSetBuffer * const buf = inputs[i];
    181         if (LLVM_UNLIKELY(buf == nullptr)) {
    182             report_fatal_error(getName() + ": input stream set " + std::to_string(i)
    183                                + " cannot be null");
    184         }
    185         buf->addConsumer(this);
    186     }
    187 
    188     if (LLVM_UNLIKELY(mStreamSetOutputs.size() != outputs.size())) {
    189         report_fatal_error(getName() + ": expected " + std::to_string(mStreamSetOutputs.size())
    190                            + " output stream sets but was given "
    191                            + std::to_string(outputs.size()));
    192     }
    193 
    194     for (unsigned i = 0; i < outputs.size(); ++i) {
    195         StreamSetBuffer * const buf = outputs[i];
    196         if (LLVM_UNLIKELY(buf == nullptr)) {
    197             report_fatal_error(getName() + ": output stream set " + std::to_string(i) + " cannot be null");
    198         }
    199         if (LLVM_LIKELY(buf->getProducer() == nullptr)) {
    200             buf->setProducer(this);
    201         } else {
    202             report_fatal_error(getName() + ": output stream set " + std::to_string(i)
    203                                + " is already produced by kernel " + buf->getProducer()->getName());
    204         }
    205     }
    206 
    207     mModule = kernelModule;
    208 
    209     mStreamSetInputBuffers.assign(inputs.begin(), inputs.end());
    210     mStreamSetOutputBuffers.assign(outputs.begin(), outputs.end());
    211 
    212     prepareKernel();
    213 }
    214 
     191    mKernelStateType = StructType::create(idb->getContext(), mKernelFields, getName());
     192}
    215193
    216194// Default kernel signature: generate the IR and emit as byte code.
    217 std::string Kernel::makeSignature() {
    218     assert ("KernelBuilder does not have a valid IDISA Builder" && iBuilder);
     195std::string Kernel::makeSignature(const std::unique_ptr<kernel::KernelBuilder> & idb) {
     196    assert ("KernelBuilder does not have a valid IDISA Builder" && idb.get());
    219197    if (LLVM_LIKELY(moduleIDisSignature())) {
    220198        return getModule()->getModuleIdentifier();
    221199    } else {
    222         generateKernel();
     200        generateKernel(idb);
    223201        std::string signature;
    224202        raw_string_ostream OS(signature);
     
    228206}
    229207
    230 void Kernel::generateKernel() {
    231     assert ("KernelBuilder does not have a valid IDISA Builder" && iBuilder);
     208void Kernel::generateKernel(const std::unique_ptr<kernel::KernelBuilder> & idb) {
     209    assert ("KernelBuilder does not have a valid IDISA Builder" && idb.get());
    232210    // If the module id cannot uniquely identify this kernel, "generateKernelSignature()" will have already
    233211    // generated the unoptimized IR.
    234212    if (!mIsGenerated) {
    235         auto ip = iBuilder->saveIP();
    236         auto saveInstance = getInstance();
    237         addKernelDeclarations();
    238         callGenerateInitializeMethod();
    239         callGenerateDoSegmentMethod();       
    240         callGenerateFinalizeMethod();
     213        const auto m = idb->getModule();
     214        const auto ip = idb->saveIP();
     215        const auto saveInstance = getInstance();
     216        idb->setModule(mModule);
     217        addKernelDeclarations(idb);
     218        callGenerateInitializeMethod(idb);
     219        callGenerateDoSegmentMethod(idb);
     220        callGenerateFinalizeMethod(idb);
    241221        setInstance(saveInstance);
    242         iBuilder->restoreIP(ip);
     222        idb->setModule(m);
     223        idb->restoreIP(ip);
    243224        mIsGenerated = true;
    244225    }
    245226}
    246227
    247 inline void Kernel::callGenerateInitializeMethod() {
    248     mCurrentMethod = getInitFunction(iBuilder->getModule());
    249     iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
     228inline void Kernel::callGenerateInitializeMethod(const std::unique_ptr<kernel::KernelBuilder> & idb) {
     229    mCurrentMethod = getInitFunction(idb->getModule());
     230    idb->SetInsertPoint(BasicBlock::Create(idb->getContext(), "entry", mCurrentMethod));
    250231    Function::arg_iterator args = mCurrentMethod->arg_begin();
    251232    setInstance(&*(args++));
    252     iBuilder->CreateStore(ConstantAggregateZero::get(mKernelStateType), getInstance());
     233    idb->CreateStore(ConstantAggregateZero::get(mKernelStateType), getInstance());
    253234    for (const auto & binding : mScalarInputs) {
    254         setScalarField(binding.name, &*(args++));
     235        idb->setScalarField(binding.name, &*(args++));
    255236    }
    256237    for (const auto & binding : mStreamSetOutputs) {
    257         setConsumerLock(binding.name, &*(args++));
    258     }
    259     generateInitializeMethod();
    260     iBuilder->CreateRetVoid();
    261 }
    262 
    263 inline void Kernel::callGenerateDoSegmentMethod() {
    264     mCurrentMethod = getDoSegmentFunction(iBuilder->getModule());
    265     BasicBlock * const entry = CreateBasicBlock(getName() + "_entry");
    266     iBuilder->SetInsertPoint(entry);
     238        idb->setConsumerLock(binding.name, &*(args++));
     239    }
     240    generateInitializeMethod(idb);
     241    idb->CreateRetVoid();
     242}
     243
     244inline void Kernel::callGenerateDoSegmentMethod(const std::unique_ptr<kernel::KernelBuilder> & idb) {
     245    mCurrentMethod = getDoSegmentFunction(idb->getModule());
     246    idb->SetInsertPoint(BasicBlock::Create(idb->getContext(), "entry", mCurrentMethod));
    267247    auto args = mCurrentMethod->arg_begin();
    268248    setInstance(&*(args++));
     
    273253        mAvailableItemCount[i] = &*(args++);
    274254    }
    275     generateDoSegmentMethod(); // must be overridden by the KernelBuilder subtype
     255    generateDoSegmentMethod(idb); // must be overridden by the KernelBuilder subtype
    276256    mIsFinal = nullptr;
    277257    mAvailableItemCount.clear();
    278     iBuilder->CreateRetVoid();
    279 }
    280 
    281 inline void Kernel::callGenerateFinalizeMethod() {
    282     mCurrentMethod = getTerminateFunction(iBuilder->getModule());
    283     iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
     258    idb->CreateRetVoid();
     259}
     260
     261inline void Kernel::callGenerateFinalizeMethod(const std::unique_ptr<KernelBuilder> & idb) {
     262    mCurrentMethod = getTerminateFunction(idb->getModule());
     263    idb->SetInsertPoint(BasicBlock::Create(idb->getContext(), "entry", mCurrentMethod));
    284264    auto args = mCurrentMethod->arg_begin();
    285265    setInstance(&*(args++));
    286     generateFinalizeMethod(); // may be overridden by the KernelBuilder subtype
     266    generateFinalizeMethod(idb); // may be overridden by the KernelBuilder subtype
    287267    const auto n = mScalarOutputs.size();
    288268    if (n == 0) {
    289         iBuilder->CreateRetVoid();
     269        idb->CreateRetVoid();
    290270    } else {
    291271        Value * outputs[n];
    292272        for (unsigned i = 0; i < n; ++i) {
    293             outputs[i] = getScalarField(mScalarOutputs[i].name);
     273            outputs[i] = idb->getScalarField(mScalarOutputs[i].name);
    294274        }
    295275        if (n == 1) {
    296             iBuilder->CreateRet(outputs[0]);
     276            idb->CreateRet(outputs[0]);
    297277        } else {
    298             iBuilder->CreateAggregateRet(outputs, n);
     278            idb->CreateAggregateRet(outputs, n);
    299279        }
    300280    }
     
    302282
    303283unsigned Kernel::getScalarIndex(const std::string & name) const {
    304     assert ("getScalarIndex was given a null IDISA Builder" && iBuilder);
    305284    const auto f = mKernelMap.find(name);
    306285    if (LLVM_UNLIKELY(f == mKernelMap.end())) {
     286        assert (false);
    307287        report_fatal_error(getName() + " does not contain scalar: " + name);
    308288    }
     
    310290}
    311291
    312 Value * Kernel::getProducedItemCount(const std::string & name, Value * doFinal) const {
    313     Port port; unsigned ssIdx;
    314     std::tie(port, ssIdx) = getStreamPort(name);
    315     assert (port == Port::Output);
    316     if (mStreamSetOutputs[ssIdx].rate.isExact()) {
    317         std::string refSet = mStreamSetOutputs[ssIdx].rate.referenceStreamSet();
    318         std::string principalField;
    319         if (refSet.empty()) {
    320             if (mStreamSetInputs.empty()) {
    321                 principalField = mStreamSetOutputs[0].name + PRODUCED_ITEM_COUNT_SUFFIX;
    322             } else {
    323                 principalField = mStreamSetInputs[0].name + PROCESSED_ITEM_COUNT_SUFFIX;
    324             }
    325         } else {
    326             Port port; unsigned pfIndex;
    327             std::tie(port, pfIndex) = getStreamPort(refSet);
    328             if (port == Port::Input) {
    329                principalField = refSet + PROCESSED_ITEM_COUNT_SUFFIX;
    330             } else {
    331                principalField = refSet + PRODUCED_ITEM_COUNT_SUFFIX;
    332             }
    333         }
    334         Value * principalItemsProcessed = getScalarField(principalField);
    335         return mStreamSetOutputs[ssIdx].rate.CreateRatioCalculation(iBuilder, principalItemsProcessed, doFinal);
    336     }
    337     return getScalarField(name + PRODUCED_ITEM_COUNT_SUFFIX);
    338 }
    339 
    340 llvm::Value * Kernel::getAvailableItemCount(const std::string & name) const {
    341     for (unsigned i = 0; i < mStreamSetInputs.size(); ++i) {
    342         if (mStreamSetInputs[i].name == name) {
    343             return mAvailableItemCount[i];
    344         }
    345     }
    346     return nullptr;
    347 }
    348 
    349 Value * Kernel::getProcessedItemCount(const std::string & name) const {
    350     Port port; unsigned ssIdx;
    351     std::tie(port, ssIdx) = getStreamPort(name);
    352     assert (port == Port::Input);
    353     if (mStreamSetInputs[ssIdx].rate.isExact()) {
    354         std::string refSet = mStreamSetInputs[ssIdx].rate.referenceStreamSet();
    355         if (refSet.empty()) {
    356             refSet = mStreamSetInputs[0].name;
    357         }
    358         Value * principalItemsProcessed = getScalarField(refSet + PROCESSED_ITEM_COUNT_SUFFIX);
    359         return mStreamSetInputs[ssIdx].rate.CreateRatioCalculation(iBuilder, principalItemsProcessed);
    360     }
    361     return getScalarField(name + PROCESSED_ITEM_COUNT_SUFFIX);
    362 }
    363 
    364 Value * Kernel::getConsumedItemCount(const std::string & name) const {
    365     return getScalarField(name + CONSUMED_ITEM_COUNT_SUFFIX);
    366 }
    367 
    368 void Kernel::setProducedItemCount(const std::string & name, Value * value) const {
    369     setScalarField(name + PRODUCED_ITEM_COUNT_SUFFIX, value);
    370 }
    371 
    372 void Kernel::setProcessedItemCount(const std::string & name, Value * value) const {
    373     setScalarField(name + PROCESSED_ITEM_COUNT_SUFFIX, value);
    374 }
    375 
    376 void Kernel::setConsumedItemCount(const std::string & name, Value * value) const {
    377     setScalarField(name + CONSUMED_ITEM_COUNT_SUFFIX, value);
    378 }
    379 
    380 Value * Kernel::getTerminationSignal() const {
    381     return getScalarField(TERMINATION_SIGNAL);
    382 }
    383 
    384 void Kernel::setTerminationSignal() const {
    385     setScalarField(TERMINATION_SIGNAL, iBuilder->getTrue());
    386 }
    387 
    388 LoadInst * Kernel::acquireLogicalSegmentNo() const {
    389     assert (iBuilder);
    390     return iBuilder->CreateAtomicLoadAcquire(getScalarFieldPtr(LOGICAL_SEGMENT_NO_SCALAR));
    391 }
    392 
    393 void Kernel::releaseLogicalSegmentNo(Value * nextSegNo) const {
    394     iBuilder->CreateAtomicStoreRelease(nextSegNo, getScalarFieldPtr(LOGICAL_SEGMENT_NO_SCALAR));
    395 }
    396 
    397 llvm::Value * Kernel::getLinearlyAccessibleItems(const std::string & name, llvm::Value * fromPosition) const {
    398     const StreamSetBuffer * const buf = getInputStreamSetBuffer(name);
    399     return buf->getLinearlyAccessibleItems(iBuilder, fromPosition);
    400 }
    401 
    402 llvm::Value * Kernel::getConsumerLock(const std::string & name) const {
    403     return getScalarField(name + CONSUMER_SUFFIX);
    404 }
    405 
    406 void Kernel::setConsumerLock(const std::string & name, llvm::Value * value) const {
    407     setScalarField(name + CONSUMER_SUFFIX, value);
    408 }
    409 
    410 inline Value * Kernel::computeBlockIndex(const std::vector<Binding> & bindings, const std::string & name, Value * itemCount) const {
    411     for (const Binding & b : bindings) {
    412         if (b.name == name) {
    413             const auto divisor = iBuilder->getBitBlockWidth();
    414             if (LLVM_LIKELY((divisor & (divisor - 1)) == 0)) {
    415                 return iBuilder->CreateLShr(itemCount, std::log2(divisor));
    416             } else {
    417                 return iBuilder->CreateUDiv(itemCount, iBuilder->getSize(divisor));
    418             }
    419         }
    420     }
    421     report_fatal_error("Error: no binding in " + getName() + " for " + name);
    422 }
    423 
    424 Value * Kernel::getInputStreamBlockPtr(const std::string & name, Value * streamIndex) const {
    425     Value * const blockIndex = computeBlockIndex(mStreamSetInputs, name, getProcessedItemCount(name));
    426     const StreamSetBuffer * const buf = getInputStreamSetBuffer(name);
    427     return buf->getStreamBlockPtr(iBuilder, getStreamSetBufferPtr(name), streamIndex, blockIndex, true);
    428 }
    429 
    430 Value * Kernel::loadInputStreamBlock(const std::string & name, Value * streamIndex) const {
    431     return iBuilder->CreateBlockAlignedLoad(getInputStreamBlockPtr(name, streamIndex));
    432 }
    433 
    434 Value * Kernel::getInputStreamPackPtr(const std::string & name, Value * streamIndex, Value * packIndex) const {
    435     Value * const blockIndex = computeBlockIndex(mStreamSetInputs, name, getProcessedItemCount(name));
    436     const StreamSetBuffer * const buf = getInputStreamSetBuffer(name);
    437     return buf->getStreamPackPtr(iBuilder, getStreamSetBufferPtr(name), streamIndex, blockIndex, packIndex, true);
    438 }
    439 
    440 Value * Kernel::loadInputStreamPack(const std::string & name, Value * streamIndex, Value * packIndex) const {
    441     return iBuilder->CreateBlockAlignedLoad(getInputStreamPackPtr(name, streamIndex, packIndex));
    442 }
    443 
    444 llvm::Value * Kernel::getInputStreamSetCount(const std::string & name) const {
    445     return getInputStreamSetBuffer(name)->getStreamSetCount(iBuilder, getStreamSetBufferPtr(name));
    446 }
    447 
    448 llvm::Value * Kernel::getAdjustedInputStreamBlockPtr(Value * blockAdjustment, const std::string & name, llvm::Value * streamIndex) const {
    449     Value * blockIndex = computeBlockIndex(mStreamSetInputs, name, getProcessedItemCount(name));
    450     blockIndex = iBuilder->CreateAdd(blockIndex, blockAdjustment);
    451     const StreamSetBuffer * const buf = getInputStreamSetBuffer(name);
    452     return buf->getStreamBlockPtr(iBuilder, getStreamSetBufferPtr(name), streamIndex, blockIndex, true);
    453 }
    454 
    455 Value * Kernel::getOutputStreamBlockPtr(const std::string & name, Value * streamIndex) const {
    456     Value * const blockIndex = computeBlockIndex(mStreamSetOutputs, name, getProducedItemCount(name));
    457     const StreamSetBuffer * const buf = getOutputStreamSetBuffer(name);
    458     return buf->getStreamBlockPtr(iBuilder, getStreamSetBufferPtr(name), streamIndex, blockIndex, false);
    459 }
    460 
    461 void Kernel::storeOutputStreamBlock(const std::string & name, Value * streamIndex, Value * toStore) const {
    462     return iBuilder->CreateBlockAlignedStore(toStore, getOutputStreamBlockPtr(name, streamIndex));
    463 }
    464 
    465 Value * Kernel::getOutputStreamPackPtr(const std::string & name, Value * streamIndex, Value * packIndex) const {
    466     Value * const blockIndex = computeBlockIndex(mStreamSetOutputs, name, getProducedItemCount(name));
    467     const StreamSetBuffer * const buf = getOutputStreamSetBuffer(name);
    468     return buf->getStreamPackPtr(iBuilder, getStreamSetBufferPtr(name), streamIndex, blockIndex, packIndex, false);
    469 }
    470 
    471 void Kernel::storeOutputStreamPack(const std::string & name, Value * streamIndex, Value * packIndex, Value * toStore) const {
    472     return iBuilder->CreateBlockAlignedStore(toStore, getOutputStreamPackPtr(name, streamIndex, packIndex));
    473 }
    474 
    475 llvm::Value * Kernel::getOutputStreamSetCount(const std::string & name) const {
    476     return getOutputStreamSetBuffer(name)->getStreamSetCount(iBuilder, getStreamSetBufferPtr(name));
    477 }
    478 
    479 Value * Kernel::getRawInputPointer(const std::string & name, Value * streamIndex, Value * absolutePosition) const {
    480     return getInputStreamSetBuffer(name)->getRawItemPointer(iBuilder, getStreamSetBufferPtr(name), streamIndex, absolutePosition);
    481 }
    482 
    483 Value * Kernel::getRawOutputPointer(const std::string & name, Value * streamIndex, Value * absolutePosition) const {
    484     return getOutputStreamSetBuffer(name)->getRawItemPointer(iBuilder, getStreamSetBufferPtr(name), streamIndex, absolutePosition);
    485 }
    486 
    487 Value * Kernel::getBaseAddress(const std::string & name) const {
    488     return getAnyStreamSetBuffer(name)->getBaseAddress(iBuilder, getStreamSetBufferPtr(name));
    489 }
    490 
    491 void Kernel::setBaseAddress(const std::string & name, Value * const addr) const {
    492     return getAnyStreamSetBuffer(name)->setBaseAddress(iBuilder, getStreamSetBufferPtr(name), addr);
    493 }
    494 
    495 Value * Kernel::getBufferedSize(const std::string & name) const {
    496     return getAnyStreamSetBuffer(name)->getBufferedSize(iBuilder, getStreamSetBufferPtr(name));
    497 }
    498 
    499 void Kernel::setBufferedSize(const std::string & name, Value * size) const {
    500     unsigned index; Port port;
    501     std::tie(port, index) = getStreamPort(name);
    502     const StreamSetBuffer * buf = nullptr;
    503     if (port == Port::Input) {
    504         assert (index < mStreamSetInputBuffers.size());
    505         buf = mStreamSetInputBuffers[index];
    506     } else {
    507         assert (index < mStreamSetOutputBuffers.size());
    508         buf = mStreamSetOutputBuffers[index];
    509     }
    510     buf->setBufferedSize(iBuilder, getStreamSetBufferPtr(name), size);
    511 }
    512 
    513 BasicBlock * Kernel::CreateWaitForConsumers() const {
    514 
    515     const auto consumers = getStreamOutputs();
    516     BasicBlock * const entry = iBuilder->GetInsertBlock();
    517     if (consumers.empty()) {
    518         return entry;
    519     } else {
    520         Function * const parent = entry->getParent();
    521         IntegerType * const sizeTy = iBuilder->getSizeTy();
    522         ConstantInt * const zero = iBuilder->getInt32(0);
    523         ConstantInt * const one = iBuilder->getInt32(1);
    524         ConstantInt * const size0 = iBuilder->getSize(0);
    525 
    526         Value * const segNo = acquireLogicalSegmentNo();
    527         const auto n = consumers.size();
    528         BasicBlock * load[n + 1];
    529         BasicBlock * wait[n];
    530         for (unsigned i = 0; i < n; ++i) {
    531             load[i] = BasicBlock::Create(iBuilder->getContext(), consumers[i].name + "Load", parent);
    532             wait[i] = BasicBlock::Create(iBuilder->getContext(), consumers[i].name + "Wait", parent);
    533         }
    534         load[n] = BasicBlock::Create(iBuilder->getContext(), "Resume", parent);
    535         iBuilder->CreateBr(load[0]);
    536         for (unsigned i = 0; i < n; ++i) {
    537 
    538             iBuilder->SetInsertPoint(load[i]);
    539             Value * const outputConsumers = getConsumerLock(consumers[i].name);
    540 
    541             Value * const consumerCount = iBuilder->CreateLoad(iBuilder->CreateGEP(outputConsumers, {zero, zero}));
    542             Value * const consumerPtr = iBuilder->CreateLoad(iBuilder->CreateGEP(outputConsumers, {zero, one}));
    543             Value * const noConsumers = iBuilder->CreateICmpEQ(consumerCount, size0);
    544             iBuilder->CreateUnlikelyCondBr(noConsumers, load[i + 1], wait[i]);
    545 
    546             iBuilder->SetInsertPoint(wait[i]);
    547             PHINode * const consumerPhi = iBuilder->CreatePHI(sizeTy, 2);
    548             consumerPhi->addIncoming(size0, load[i]);
    549 
    550             Value * const conSegPtr = iBuilder->CreateLoad(iBuilder->CreateGEP(consumerPtr, consumerPhi));
    551             Value * const processedSegmentCount = iBuilder->CreateAtomicLoadAcquire(conSegPtr);
    552             Value * const ready = iBuilder->CreateICmpEQ(segNo, processedSegmentCount);
    553             assert (ready->getType() == iBuilder->getInt1Ty());
    554             Value * const nextConsumerIdx = iBuilder->CreateAdd(consumerPhi, iBuilder->CreateZExt(ready, sizeTy));
    555             consumerPhi->addIncoming(nextConsumerIdx, wait[i]);
    556             Value * const next = iBuilder->CreateICmpEQ(nextConsumerIdx, consumerCount);
    557             iBuilder->CreateCondBr(next, load[i + 1], wait[i]);
    558         }
    559 
    560         BasicBlock * const exit = load[n];
    561         iBuilder->SetInsertPoint(exit);
    562         return exit;
    563     }
    564 
    565 }
    566 
    567 Value * Kernel::getStreamSetBufferPtr(const std::string & name) const {
    568     return getScalarField(name + BUFFER_PTR_SUFFIX);
    569 }
    570 
    571 //Argument * Kernel::getParameter(Function * const f, const std::string & name) const {
    572 //    for (auto & arg : f->getArgumentList()) {
    573 //        if (arg.getName().equals(name)) {
    574 //            return &arg;
    575 //        }
    576 //    }
    577 //    report_fatal_error(getName() + " does not have parameter " + name);
    578 //}
    579 
    580 CallInst * Kernel::createDoSegmentCall(const std::vector<Value *> & args) const {
    581     Function * const doSegment = getDoSegmentFunction(iBuilder->getModule());
    582     assert (doSegment->getArgumentList().size() == args.size());
    583     return iBuilder->CreateCall(doSegment, args);
    584 }
    585 
    586 Value * Kernel::getAccumulator(const std::string & accumName) const {
    587     assert ("KernelBuilder does not have a valid IDISA Builder" && iBuilder);
    588     if (LLVM_UNLIKELY(mOutputScalarResult == nullptr)) {
    589         report_fatal_error("Cannot get accumulator " + accumName + " until " + getName() + " has terminated.");
    590     }
    591     const auto n = mScalarOutputs.size();
    592     if (LLVM_UNLIKELY(n == 0)) {
    593         report_fatal_error(getName() + " has no output scalars.");
    594     } else {
    595         for (unsigned i = 0; i < n; ++i) {
    596             const Binding & b = mScalarOutputs[i];
    597             if (b.name == accumName) {
    598                 if (n == 1) {
    599                     return mOutputScalarResult;
    600                 } else {
    601                     return iBuilder->CreateExtractValue(mOutputScalarResult, {i});
    602                 }
    603             }
    604         }
    605         report_fatal_error(getName() + " has no output scalar named " + accumName);
    606     }
    607 }
    608 
    609 BasicBlock * Kernel::CreateBasicBlock(std::string && name) const {
    610     return BasicBlock::Create(iBuilder->getContext(), name, mCurrentMethod);
    611 }
    612 
    613 Value * Kernel::createInstance() {
    614     assert ("KernelBuilder does not have a valid IDISA Builder" && iBuilder);
     292Value * Kernel::createInstance(const std::unique_ptr<KernelBuilder> & idb) {
     293    assert ("KernelBuilder does not have a valid IDISA Builder" && idb);
    615294    if (LLVM_UNLIKELY(mKernelStateType == nullptr)) {
    616295        report_fatal_error("Cannot instantiate " + getName() + " before calling prepareKernel()");
    617296    }
    618     setInstance(iBuilder->CreateCacheAlignedAlloca(mKernelStateType));
     297    setInstance(idb->CreateCacheAlignedAlloca(mKernelStateType));
    619298    return getInstance();
    620299}
    621300
    622 void Kernel::initializeInstance() {
    623     assert ("KernelBuilder does not have a valid IDISA Builder" && iBuilder);
     301void Kernel::initializeInstance(const std::unique_ptr<KernelBuilder> & idb) {
     302    assert ("KernelBuilder does not have a valid IDISA Builder" && idb);
    624303    if (LLVM_UNLIKELY(getInstance() == nullptr)) {
    625304        report_fatal_error("Cannot initialize " + getName() + " before calling createInstance()");
     
    656335    }
    657336    assert (mStreamSetOutputs.size() == mStreamSetOutputBuffers.size());
    658     IntegerType * const sizeTy = iBuilder->getSizeTy();
     337    IntegerType * const sizeTy = idb->getSizeTy();
    659338    PointerType * const sizePtrTy = sizeTy->getPointerTo();
    660339    PointerType * const sizePtrPtrTy = sizePtrTy->getPointerTo();
     
    664343        const auto & consumers = output->getConsumers();
    665344        const auto n = consumers.size();
    666         AllocaInst * const outputConsumers = iBuilder->CreateAlloca(consumerTy);
    667         Value * const consumerSegNoArray = iBuilder->CreateAlloca(ArrayType::get(sizePtrTy, n));
     345        AllocaInst * const outputConsumers = idb->CreateAlloca(consumerTy);
     346        Value * const consumerSegNoArray = idb->CreateAlloca(ArrayType::get(sizePtrTy, n));
    668347        for (unsigned i = 0; i < n; ++i) {
    669348            Kernel * const consumer = consumers[i];
    670349            assert ("all instances must be created prior to initialization of any instance" && consumer->getInstance());
    671             Value * const segmentNoPtr = consumer->getScalarFieldPtr(LOGICAL_SEGMENT_NO_SCALAR);
    672             iBuilder->CreateStore(segmentNoPtr, iBuilder->CreateGEP(consumerSegNoArray, { iBuilder->getInt32(0), iBuilder->getInt32(i) }));
    673         }
    674         Value * const consumerCountPtr = iBuilder->CreateGEP(outputConsumers, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
    675         iBuilder->CreateStore(iBuilder->getSize(n), consumerCountPtr);
    676         Value * const consumerSegNoArrayPtr = iBuilder->CreateGEP(outputConsumers, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
    677         iBuilder->CreateStore(iBuilder->CreatePointerCast(consumerSegNoArray, sizePtrPtrTy), consumerSegNoArrayPtr);
     350            idb->setKernel(consumer);
     351            Value * const segmentNoPtr = idb->getScalarFieldPtr(LOGICAL_SEGMENT_NO_SCALAR);
     352            idb->CreateStore(segmentNoPtr, idb->CreateGEP(consumerSegNoArray, { idb->getInt32(0), idb->getInt32(i) }));
     353        }
     354        idb->setKernel(this);
     355        Value * const consumerCountPtr = idb->CreateGEP(outputConsumers, {idb->getInt32(0), idb->getInt32(0)});
     356        idb->CreateStore(idb->getSize(n), consumerCountPtr);
     357        Value * const consumerSegNoArrayPtr = idb->CreateGEP(outputConsumers, {idb->getInt32(0), idb->getInt32(1)});
     358        idb->CreateStore(idb->CreatePointerCast(consumerSegNoArray, sizePtrPtrTy), consumerSegNoArrayPtr);
    678359        args.push_back(outputConsumers);
    679360    }
    680 
    681     iBuilder->CreateCall(getInitFunction(iBuilder->getModule()), args);
     361    idb->CreateCall(getInitFunction(idb->getModule()), args);
    682362}
    683363
     
    685365//  each block of the given number of blocksToDo, and then updates counts.
    686366
    687 void BlockOrientedKernel::generateDoSegmentMethod() {   
    688     BasicBlock * const entryBlock = iBuilder->GetInsertBlock();
    689     BasicBlock * const strideLoopCond = CreateBasicBlock(getName() + "_strideLoopCond");
    690     mStrideLoopBody = CreateBasicBlock(getName() + "_strideLoopBody");
    691     BasicBlock * const stridesDone = CreateBasicBlock(getName() + "_stridesDone");
    692     BasicBlock * const doFinalBlock = CreateBasicBlock(getName() + "_doFinalBlock");
    693     BasicBlock * const segmentDone = CreateBasicBlock(getName() + "_segmentDone");
     367void BlockOrientedKernel::generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & idb) {
     368    BasicBlock * const entryBlock = idb->GetInsertBlock();
     369    BasicBlock * const strideLoopCond = idb->CreateBasicBlock(getName() + "_strideLoopCond");
     370    mStrideLoopBody = idb->CreateBasicBlock(getName() + "_strideLoopBody");
     371    BasicBlock * const stridesDone = idb->CreateBasicBlock(getName() + "_stridesDone");
     372    BasicBlock * const doFinalBlock = idb->CreateBasicBlock(getName() + "_doFinalBlock");
     373    BasicBlock * const segmentDone = idb->CreateBasicBlock(getName() + "_segmentDone");
    694374
    695375    Value * baseTarget = nullptr;
    696     if (useIndirectBr()) {
    697         baseTarget = iBuilder->CreateSelect(mIsFinal, BlockAddress::get(doFinalBlock), BlockAddress::get(segmentDone));
    698     }
    699 
    700     ConstantInt * stride = iBuilder->getSize(iBuilder->getStride());
     376    if (idb->supportsIndirectBr()) {
     377        baseTarget = idb->CreateSelect(mIsFinal, BlockAddress::get(doFinalBlock), BlockAddress::get(segmentDone));
     378    }
     379
     380    ConstantInt * stride = idb->getSize(idb->getStride());
    701381    Value * availablePos = mAvailableItemCount[0];
    702     Value * processed = getProcessedItemCount(mStreamSetInputs[0].name);
    703     Value * itemsAvail = iBuilder->CreateSub(availablePos, processed);
    704     Value * stridesToDo = iBuilder->CreateUDiv(itemsAvail, stride);
    705 
    706     iBuilder->CreateBr(strideLoopCond);
    707 
    708     iBuilder->SetInsertPoint(strideLoopCond);
     382    Value * processed = idb->getProcessedItemCount(mStreamSetInputs[0].name);
     383    Value * itemsAvail = idb->CreateSub(availablePos, processed);
     384    Value * stridesToDo = idb->CreateUDiv(itemsAvail, stride);
     385
     386    idb->CreateBr(strideLoopCond);
     387
     388    idb->SetInsertPoint(strideLoopCond);
    709389
    710390    PHINode * branchTarget = nullptr;
    711     if (useIndirectBr()) {
    712         branchTarget = iBuilder->CreatePHI(baseTarget->getType(), 2, "branchTarget");
     391    if (idb->supportsIndirectBr()) {
     392        branchTarget = idb->CreatePHI(baseTarget->getType(), 2, "branchTarget");
    713393        branchTarget->addIncoming(baseTarget, entryBlock);
    714394    }
    715395
    716     PHINode * const stridesRemaining = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2, "stridesRemaining");
     396    PHINode * const stridesRemaining = idb->CreatePHI(idb->getSizeTy(), 2, "stridesRemaining");
    717397    stridesRemaining->addIncoming(stridesToDo, entryBlock);
    718398    // NOTE: stridesRemaining may go to a negative number in the final block if the generateFinalBlockMethod(...)
    719399    // calls CreateDoBlockMethodCall(). Do *not* replace the comparator with an unsigned one!
    720     Value * notDone = iBuilder->CreateICmpSGT(stridesRemaining, iBuilder->getSize(0));
    721     iBuilder->CreateLikelyCondBr(notDone, mStrideLoopBody, stridesDone);
    722 
    723     iBuilder->SetInsertPoint(mStrideLoopBody);
    724 
    725     if (useIndirectBr()) {
    726         mStrideLoopTarget = iBuilder->CreatePHI(baseTarget->getType(), 2, "strideTarget");
     400    Value * notDone = idb->CreateICmpSGT(stridesRemaining, idb->getSize(0));
     401    idb->CreateLikelyCondBr(notDone, mStrideLoopBody, stridesDone);
     402
     403    idb->SetInsertPoint(mStrideLoopBody);
     404
     405    if (idb->supportsIndirectBr()) {
     406        mStrideLoopTarget = idb->CreatePHI(baseTarget->getType(), 2, "strideTarget");
    727407        mStrideLoopTarget->addIncoming(branchTarget, strideLoopCond);
    728408    }
     
    730410    /// GENERATE DO BLOCK METHOD
    731411
    732     writeDoBlockMethod();
     412    writeDoBlockMethod(idb);
    733413
    734414    /// UPDATE PROCESSED COUNTS
    735415
    736     processed = getProcessedItemCount(mStreamSetInputs[0].name);
    737     Value * itemsDone = iBuilder->CreateAdd(processed, stride);
    738     setProcessedItemCount(mStreamSetInputs[0].name, itemsDone);
    739 
    740     stridesRemaining->addIncoming(iBuilder->CreateSub(stridesRemaining, iBuilder->getSize(1)), iBuilder->GetInsertBlock());
    741 
    742     BasicBlock * bodyEnd = iBuilder->GetInsertBlock();
    743     if (useIndirectBr()) {
     416    processed = idb->getProcessedItemCount(mStreamSetInputs[0].name);
     417    Value * itemsDone = idb->CreateAdd(processed, stride);
     418    idb->setProcessedItemCount(mStreamSetInputs[0].name, itemsDone);
     419
     420    stridesRemaining->addIncoming(idb->CreateSub(stridesRemaining, idb->getSize(1)), idb->GetInsertBlock());
     421
     422    BasicBlock * bodyEnd = idb->GetInsertBlock();
     423    if (idb->supportsIndirectBr()) {
    744424        branchTarget->addIncoming(mStrideLoopTarget, bodyEnd);
    745425    }
    746     iBuilder->CreateBr(strideLoopCond);
     426    idb->CreateBr(strideLoopCond);
    747427
    748428    stridesDone->moveAfter(bodyEnd);
    749429
    750     iBuilder->SetInsertPoint(stridesDone);
     430    idb->SetInsertPoint(stridesDone);
    751431
    752432    // Now conditionally perform the final block processing depending on the doFinal parameter.
    753     if (useIndirectBr()) {
    754         mStrideLoopBranch = iBuilder->CreateIndirectBr(branchTarget, 3);
     433    if (idb->supportsIndirectBr()) {
     434        mStrideLoopBranch = idb->CreateIndirectBr(branchTarget, 3);
    755435        mStrideLoopBranch->addDestination(doFinalBlock);
    756436        mStrideLoopBranch->addDestination(segmentDone);
    757437    } else {
    758         iBuilder->CreateUnlikelyCondBr(mIsFinal, doFinalBlock, segmentDone);
     438        idb->CreateUnlikelyCondBr(mIsFinal, doFinalBlock, segmentDone);
    759439    }
    760440
    761441    doFinalBlock->moveAfter(stridesDone);
    762442
    763     iBuilder->SetInsertPoint(doFinalBlock);
    764 
    765     Value * remainingItems = iBuilder->CreateSub(mAvailableItemCount[0], getProcessedItemCount(mStreamSetInputs[0].name));
    766     writeFinalBlockMethod(remainingItems);
     443    idb->SetInsertPoint(doFinalBlock);
     444
     445    Value * remainingItems = idb->CreateSub(mAvailableItemCount[0], idb->getProcessedItemCount(mStreamSetInputs[0].name));
     446
     447    writeFinalBlockMethod(idb, remainingItems);
    767448
    768449    itemsDone = mAvailableItemCount[0];
    769     setProcessedItemCount(mStreamSetInputs[0].name, itemsDone);
    770     setTerminationSignal();
    771     iBuilder->CreateBr(segmentDone);
    772 
    773     segmentDone->moveAfter(iBuilder->GetInsertBlock());
    774 
    775     iBuilder->SetInsertPoint(segmentDone);
     450    idb->setProcessedItemCount(mStreamSetInputs[0].name, itemsDone);
     451    idb->setTerminationSignal();
     452    idb->CreateBr(segmentDone);
     453
     454    segmentDone->moveAfter(idb->GetInsertBlock());
     455
     456    idb->SetInsertPoint(segmentDone);
    776457
    777458    // Update the branch prediction metadata to indicate that the likely target will be segmentDone
    778     if (useIndirectBr()) {
    779         MDBuilder mdb(iBuilder->getContext());
     459    if (idb->supportsIndirectBr()) {
     460        MDBuilder mdb(idb->getContext());
    780461        const auto destinations = mStrideLoopBranch->getNumDestinations();
    781462        uint32_t weights[destinations];
     
    789470}
    790471
    791 inline void BlockOrientedKernel::writeDoBlockMethod() {
     472inline void BlockOrientedKernel::writeDoBlockMethod(const std::unique_ptr<KernelBuilder> & idb) {
    792473
    793474    Value * const self = getInstance();
    794475    Function * const cp = mCurrentMethod;
    795     auto ip = iBuilder->saveIP();
     476    auto ip = idb->saveIP();
    796477
    797478    /// Check if the do block method is called and create the function if necessary   
    798     if (!useIndirectBr()) {
    799         FunctionType * const type = FunctionType::get(iBuilder->getVoidTy(), {self->getType()}, false);
    800         mCurrentMethod = Function::Create(type, GlobalValue::InternalLinkage, getName() + DO_BLOCK_SUFFIX, iBuilder->getModule());
     479    if (!idb->supportsIndirectBr()) {
     480        FunctionType * const type = FunctionType::get(idb->getVoidTy(), {self->getType()}, false);
     481        mCurrentMethod = Function::Create(type, GlobalValue::InternalLinkage, getName() + DO_BLOCK_SUFFIX, idb->getModule());
    801482        mCurrentMethod->setCallingConv(CallingConv::C);
    802483        mCurrentMethod->setDoesNotThrow();
     
    805486        args->setName("self");
    806487        setInstance(&*args);
    807         iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
     488        idb->SetInsertPoint(idb->CreateBasicBlock("entry"));
    808489    }
    809490
     
    811492    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    812493        if (isa<CircularCopybackBuffer>(mStreamSetOutputBuffers[i]) || isa<SwizzledCopybackBuffer>(mStreamSetOutputBuffers[i]))  {
    813             priorProduced.push_back(getProducedItemCount(mStreamSetOutputs[i].name));
    814         }
    815     }
    816 
    817     generateDoBlockMethod(); // must be implemented by the BlockOrientedKernelBuilder subtype
    818 
     494            priorProduced.push_back(idb->getProducedItemCount(mStreamSetOutputs[i].name));
     495        }
     496    }
     497
     498    generateDoBlockMethod(idb); // must be implemented by the BlockOrientedKernelBuilder subtype
     499
     500    unsigned priorIdx = 0;
     501    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
     502        Value * log2BlockSize = idb->getSize(std::log2(idb->getBitBlockWidth()));
     503        if (SwizzledCopybackBuffer * const cb = dyn_cast<SwizzledCopybackBuffer>(mStreamSetOutputBuffers[i]))  {
     504            BasicBlock * copyBack = idb->CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBack");
     505            BasicBlock * done = idb->CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBackDone");
     506            Value * newlyProduced = idb->CreateSub(idb->getProducedItemCount(mStreamSetOutputs[i].name), priorProduced[priorIdx]);
     507            Value * priorBlock = idb->CreateLShr(priorProduced[priorIdx], log2BlockSize);
     508            Value * priorOffset = idb->CreateAnd(priorProduced[priorIdx], idb->getSize(idb->getBitBlockWidth() - 1));
     509            Value * instance = idb->getStreamSetBufferPtr(mStreamSetOutputs[i].name);
     510            Value * accessibleBlocks = cb->getLinearlyAccessibleBlocks(idb.get(), priorBlock);
     511            Value * accessible = idb->CreateSub(idb->CreateShl(accessibleBlocks, log2BlockSize), priorOffset);
     512            Value * wraparound = idb->CreateICmpULT(accessible, newlyProduced);
     513            idb->CreateCondBr(wraparound, copyBack, done);
     514            idb->SetInsertPoint(copyBack);
     515            Value * copyItems = idb->CreateSub(newlyProduced, accessible);
     516            cb->createCopyBack(idb.get(), instance, copyItems);
     517            idb->CreateBr(done);
     518            idb->SetInsertPoint(done);
     519            priorIdx++;
     520        }
     521        if (CircularCopybackBuffer * const cb = dyn_cast<CircularCopybackBuffer>(mStreamSetOutputBuffers[i]))  {
     522            BasicBlock * copyBack = idb->CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBack");
     523            BasicBlock * done = idb->CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBackDone");
     524            Value * instance = idb->getStreamSetBufferPtr(mStreamSetOutputs[i].name);
     525            Value * newlyProduced = idb->CreateSub(idb->getProducedItemCount(mStreamSetOutputs[i].name), priorProduced[priorIdx]);
     526            Value * accessible = cb->getLinearlyAccessibleItems(idb.get(), priorProduced[priorIdx]);
     527            Value * wraparound = idb->CreateICmpULT(accessible, newlyProduced);
     528            idb->CreateCondBr(wraparound, copyBack, done);
     529            idb->SetInsertPoint(copyBack);
     530            Value * copyItems = idb->CreateSub(newlyProduced, accessible);
     531            cb->createCopyBack(idb.get(), instance, copyItems);
     532            idb->CreateBr(done);
     533            idb->SetInsertPoint(done);
     534            priorIdx++;
     535        }
     536    }
     537
     538
     539    /// Call the do block method if necessary then restore the current function state to the do segement method
     540    if (!idb->supportsIndirectBr()) {
     541        idb->CreateRetVoid();
     542        mDoBlockMethod = mCurrentMethod;
     543        idb->restoreIP(ip);
     544        idb->CreateCall(mCurrentMethod, self);
     545        setInstance(self);
     546        mCurrentMethod = cp;
     547    }
     548
     549}
     550
     551inline void BlockOrientedKernel::writeFinalBlockMethod(const std::unique_ptr<KernelBuilder> & idb, Value * remainingItems) {
     552
     553    Value * const self = getInstance();
     554    Function * const cp = mCurrentMethod;
     555    Value * const remainingItemCount = remainingItems;
     556    auto ip = idb->saveIP();
     557
     558    if (!idb->supportsIndirectBr()) {
     559        FunctionType * const type = FunctionType::get(idb->getVoidTy(), {self->getType(), idb->getSizeTy()}, false);
     560        mCurrentMethod = Function::Create(type, GlobalValue::InternalLinkage, getName() + FINAL_BLOCK_SUFFIX, idb->getModule());
     561        mCurrentMethod->setCallingConv(CallingConv::C);
     562        mCurrentMethod->setDoesNotThrow();
     563        mCurrentMethod->setDoesNotCapture(1);
     564        auto args = mCurrentMethod->arg_begin();
     565        args->setName("self");
     566        setInstance(&*args);
     567        remainingItems = &*(++args);
     568        remainingItems->setName("remainingItems");
     569        idb->SetInsertPoint(idb->CreateBasicBlock("entry"));
     570    }
     571
     572    generateFinalBlockMethod(idb, remainingItems); // may be implemented by the BlockOrientedKernel subtype
     573
     574    RecursivelyDeleteTriviallyDeadInstructions(remainingItems); // if remainingItems was not used, this will eliminate it.
     575
     576    if (!idb->supportsIndirectBr()) {
     577        idb->CreateRetVoid();
     578        idb->restoreIP(ip);
     579        idb->CreateCall(mCurrentMethod, {self, remainingItemCount});
     580        mCurrentMethod = cp;
     581        setInstance(self);
     582    }
     583
     584}
     585
     586//  The default finalBlock method simply dispatches to the doBlock routine.
     587void BlockOrientedKernel::generateFinalBlockMethod(const std::unique_ptr<KernelBuilder> & idb, Value * /* remainingItems */) {
     588    CreateDoBlockMethodCall(idb);
     589}
     590
     591void BlockOrientedKernel::CreateDoBlockMethodCall(const std::unique_ptr<KernelBuilder> & idb) {
     592    if (idb->supportsIndirectBr()) {
     593        BasicBlock * bb = idb->CreateBasicBlock("resume");
     594        mStrideLoopBranch->addDestination(bb);
     595        mStrideLoopTarget->addIncoming(BlockAddress::get(bb), idb->GetInsertBlock());
     596        idb->CreateBr(mStrideLoopBody);
     597        bb->moveAfter(idb->GetInsertBlock());
     598        idb->SetInsertPoint(bb);
     599    } else {
     600        idb->CreateCall(mDoBlockMethod, getInstance());
     601    }
     602}
     603
     604void MultiBlockKernel::generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & kb) {
     605
     606    // First prepare the multi-block method that will be used.
     607    KernelBuilder * const iBuilder = kb.get();
     608
     609    std::vector<Type *> multiBlockParmTypes;
     610    multiBlockParmTypes.push_back(mKernelStateType->getPointerTo());
     611    for (auto buffer : mStreamSetInputBuffers) {
     612        multiBlockParmTypes.push_back(buffer->getPointerType());
     613    }
     614    for (auto buffer : mStreamSetOutputBuffers) {
     615        multiBlockParmTypes.push_back(buffer->getPointerType());
     616    }
     617    FunctionType * const type = FunctionType::get(iBuilder->getVoidTy(), multiBlockParmTypes, false);
     618    Function * multiBlockFunction = Function::Create(type, GlobalValue::InternalLinkage, getName() + MULTI_BLOCK_SUFFIX, iBuilder->getModule());
     619    multiBlockFunction->setCallingConv(CallingConv::C);
     620    multiBlockFunction->setDoesNotThrow();
     621    auto args = multiBlockFunction->arg_begin();
     622    args->setName("self");
     623    for (auto binding : mStreamSetInputs) {
     624        (++args)->setName(binding.name + "BufPtr");
     625    }
     626    for (auto binding : mStreamSetOutputs) {
     627        (args++)->setName(binding.name + "BufPtr");
     628    }
     629
     630    // Now use the generateMultiBlockLogic method of the MultiBlockKernelBuilder subtype to
     631    // provide the required multi-block kernel logic.
     632    auto ip = iBuilder->saveIP();
     633    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "multiBlockEntry", multiBlockFunction, 0));
     634
     635    generateMultiBlockLogic();
     636
     637    iBuilder->CreateRetVoid();
     638    iBuilder->restoreIP(ip);
     639
     640    // Now proceed with creation of the doSegment method.
     641
     642    BasicBlock * const entry = iBuilder->GetInsertBlock();
     643    BasicBlock * const doSegmentOuterLoop = iBuilder->CreateBasicBlock(getName() + "_doSegmentOuterLoop");
     644    BasicBlock * const doMultiBlockCall = iBuilder->CreateBasicBlock(getName() + "_doMultiBlockCall");
     645    BasicBlock * const finalBlockCheck = iBuilder->CreateBasicBlock(getName() + "_finalBlockCheck");
     646    BasicBlock * const doTempBufferBlock = iBuilder->CreateBasicBlock(getName() + "_doTempBufferBlock");
     647    BasicBlock * const segmentDone = iBuilder->CreateBasicBlock(getName() + "_segmentDone");
     648
     649    Value * blockBaseMask = iBuilder->CreateNot(iBuilder->getSize(iBuilder->getBitBlockWidth() - 1));
     650
     651    //
     652    //  A. Temporary Buffer Area Determination
     653    //
     654    // For final block processing and for processing near the end of physical buffer
     655    // boundaries, we need to allocate temporary space for processing a full block of input.
     656    // Compute the size requirements to store stream set data at the declared processing
     657    // rates in reference to one block of the principal input stream.
     658    //
     659
     660    unsigned bitBlockWidth = iBuilder->getBitBlockWidth();
     661    std::vector<Type *> tempBuffers;
     662    std::vector<unsigned> itemsPerPrincipalBlock;
     663    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
     664        auto & rate = mStreamSetInputs[i].rate;
     665        std::string refSet = mStreamSetInputs[i].rate.referenceStreamSet();
     666        if (refSet.empty()) {
     667            itemsPerPrincipalBlock.push_back(rate.calculateRatio(bitBlockWidth));
     668        }
     669        else {
     670            Port port; unsigned ssIdx;
     671            std::tie(port, ssIdx) = getStreamPort(mStreamSetInputs[i].name);
     672            assert (port == Port::Input && ssIdx < i);
     673            itemsPerPrincipalBlock.push_back(rate.calculateRatio(itemsPerPrincipalBlock[ssIdx]));
     674        }
     675        unsigned blocks = (itemsPerPrincipalBlock.back() + bitBlockWidth - 1)/bitBlockWidth;
     676        if (blocks > 1) {
     677            tempBuffers.push_back(ArrayType::get(mStreamSetInputBuffers[i]->getType(), blocks));
     678        }
     679        else {
     680            tempBuffers.push_back(mStreamSetInputBuffers[i]->getType());
     681        }
     682    }
     683    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
     684        auto & rate = mStreamSetOutputs[i].rate;
     685        std::string refSet = mStreamSetOutputs[i].rate.referenceStreamSet();
     686        if (refSet.empty()) {
     687            itemsPerPrincipalBlock.push_back(rate.calculateRatio(bitBlockWidth));
     688        }
     689        else {
     690            Port port; unsigned ssIdx;
     691            std::tie(port, ssIdx) = getStreamPort(mStreamSetOutputs[i].name);
     692            if (port == Port::Output) ssIdx += mStreamSetInputs.size();
     693            itemsPerPrincipalBlock.push_back(rate.calculateRatio(itemsPerPrincipalBlock[ssIdx]));
     694        }
     695        unsigned blocks = (itemsPerPrincipalBlock.back() + bitBlockWidth - 1)/bitBlockWidth;
     696        if (blocks > 1) {
     697            tempBuffers.push_back(ArrayType::get(mStreamSetOutputBuffers[i]->getType(), blocks));
     698        }
     699        else {
     700            tempBuffers.push_back(mStreamSetOutputBuffers[i]->getType());
     701        }
     702    }
     703    Type * tempParameterStructType = StructType::create(iBuilder->getContext(), tempBuffers);
     704    Value * tempParameterArea = iBuilder->CreateCacheAlignedAlloca(tempParameterStructType);
     705
     706    ConstantInt * blockSize = iBuilder->getSize(iBuilder->getBitBlockWidth());
     707    Value * availablePos = mAvailableItemCount[0];
     708    Value * itemsAvail = availablePos;
     709    //  Make sure that corresponding data is available depending on processing rate
     710    //  for all input stream sets.
     711    for (unsigned i = 1; i < mStreamSetInputs.size(); i++) {
     712        Value * a = mAvailableItemCount[i];
     713        auto & rate = mStreamSetInputs[i].rate;
     714        assert (((rate.referenceStreamSet().empty()) || (rate.referenceStreamSet() == mStreamSetInputs[0].name)) && "Multiblock kernel input rate not with respect to principal stream.");
     715        Value * maxItems = rate.CreateMaxReferenceItemsCalculation(iBuilder, a);
     716        itemsAvail = iBuilder->CreateSelect(iBuilder->CreateICmpULT(itemsAvail, maxItems), itemsAvail, maxItems);
     717    }
     718
     719    Value * processed = iBuilder->getProcessedItemCount(mStreamSetInputs[0].name);
     720    Value * itemsToDo = iBuilder->CreateSub(itemsAvail, processed);
     721    Value * fullBlocksToDo = iBuilder->CreateUDiv(itemsToDo, blockSize);
     722    Value * excessItems = iBuilder->CreateURem(itemsToDo, blockSize);
     723
     724    //  Now we iteratively process these blocks using the doMultiBlock method.
     725    //  In each iteration, we process the maximum number of linearly accessible
     726    //  blocks on the principal input, reduced to ensure that the corresponding
     727    //  data is linearly available at the specified processing rates for the other inputs,
     728    //  and that each of the output buffers has sufficient linearly available space
     729    //  (using overflow areas, if necessary) for the maximum output that can be
     730    //  produced.
     731
     732    //iBuilder->CreateCondBr(iBuilder->CreateICmpUGT(fullBlocksToDo, iBuilder->getSize(0)), doSegmentOuterLoop, finalBlockCheck);
     733    iBuilder->CreateBr(doSegmentOuterLoop);
     734
     735    iBuilder->SetInsertPoint(doSegmentOuterLoop);
     736    PHINode * const blocksRemaining = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2, "blocksRemaining");
     737    blocksRemaining->addIncoming(fullBlocksToDo, entry);
     738
     739
     740    // For each input buffer, determine the processedItemCount, the block pointer for the
     741    // buffer block containing the next item, and the number of linearly available items.
     742    //
     743    std::vector<Value *> processedItemCount;
     744    std::vector<Value *> inputBlockPtr;
     745    std::vector<Value *> producedItemCount;
     746    std::vector<Value *> outputBlockPtr;
     747
     748    //  Calculate linearly available blocks for all input stream sets.
     749    Value * linearlyAvailBlocks = nullptr;
     750    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
     751        Value * p = iBuilder->getProcessedItemCount(mStreamSetInputs[i].name);
     752        Value * blkNo = iBuilder->CreateUDiv(p, blockSize);
     753        Value * b = iBuilder->getInputStreamBlockPtr(mStreamSetInputs[i].name, iBuilder->getInt32(0));
     754        processedItemCount.push_back(p);
     755        inputBlockPtr.push_back(b);
     756        auto & rate = mStreamSetInputs[i].rate;
     757        Value * blocks = nullptr;
     758        if ((rate.isFixedRatio()) && (rate.getRatioNumerator() == rate.getRatioDenominator()) && (rate.referenceStreamSet() == "")) {
     759            blocks = mStreamSetInputBuffers[i]->getLinearlyAccessibleBlocks(iBuilder, blkNo);
     760        } else {
     761            Value * linearlyAvailItems = mStreamSetInputBuffers[i]->getLinearlyAccessibleItems(iBuilder, p);
     762            Value * items = rate.CreateMaxReferenceItemsCalculation(iBuilder, linearlyAvailItems);
     763            blocks = iBuilder->CreateUDiv(items, blockSize);
     764        }
     765        if (i == 0) {
     766            linearlyAvailBlocks = blocks;
     767        } else {
     768            linearlyAvailBlocks = iBuilder->CreateSelect(iBuilder->CreateICmpULT(blocks, linearlyAvailBlocks), blocks, linearlyAvailBlocks);
     769        }
     770    }
     771
     772    //  Now determine the linearly writeable blocks, based on available blocks reduced
     773    //  by limitations of output buffer space.
     774    Value * linearlyWritableBlocks = linearlyAvailBlocks;
     775
     776    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
     777        Value * p = iBuilder->getProducedItemCount(mStreamSetOutputs[i].name);
     778        Value * blkNo = iBuilder->CreateUDiv(p, blockSize);
     779        Value * b = iBuilder->getOutputStreamBlockPtr(mStreamSetOutputs[i].name, iBuilder->getInt32(0));
     780        producedItemCount.push_back(p);
     781        outputBlockPtr.push_back(b);
     782        auto & rate = mStreamSetOutputs[i].rate;
     783        Value * blocks = nullptr;
     784        if ((rate.isFixedRatio()) && (rate.getRatioNumerator() == rate.getRatioDenominator())) {
     785            blocks = mStreamSetOutputBuffers[0]->getLinearlyWritableBlocks(iBuilder, blkNo);
     786        } else {
     787            Value * writableItems = mStreamSetOutputBuffers[0]->getLinearlyWritableItems(iBuilder, p);
     788            blocks = iBuilder->CreateUDiv(writableItems, blockSize);
     789        }
     790        linearlyWritableBlocks = iBuilder->CreateSelect(iBuilder->CreateICmpULT(blocks, linearlyWritableBlocks), blocks, linearlyWritableBlocks);
     791    }
     792    Value * haveBlocks = iBuilder->CreateICmpUGT(linearlyWritableBlocks, iBuilder->getSize(0));
     793
     794    iBuilder->CreateCondBr(haveBlocks, doMultiBlockCall, doTempBufferBlock);
     795
     796    //  At this point we have verified the availability of one or more blocks of input data and output buffer space for all stream sets.
     797    //  Now prepare the doMultiBlock call.
     798    iBuilder->SetInsertPoint(doMultiBlockCall);
     799
     800    Value * linearlyAvailItems = iBuilder->CreateMul(linearlyWritableBlocks, blockSize);
     801
     802    std::vector<Value *> doMultiBlockArgs;
     803    doMultiBlockArgs.push_back(linearlyAvailItems);
     804    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
     805        doMultiBlockArgs.push_back(iBuilder->getRawInputPointer(mStreamSetInputs[i].name, iBuilder->getInt32(0), processedItemCount[i]));
     806    }
     807    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
     808        doMultiBlockArgs.push_back(iBuilder->getRawOutputPointer(mStreamSetOutputs[i].name, iBuilder->getInt32(0), producedItemCount[i]));
     809    }
     810
     811    iBuilder->CreateCall(multiBlockFunction, doMultiBlockArgs);
     812
     813    // Do copybacks if necessary.
    819814    unsigned priorIdx = 0;
    820815    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    821816        Value * log2BlockSize = iBuilder->getSize(std::log2(iBuilder->getBitBlockWidth()));
    822         if (SwizzledCopybackBuffer * const cb = dyn_cast<SwizzledCopybackBuffer>(mStreamSetOutputBuffers[i]))  {
    823             BasicBlock * copyBack = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBack");
    824             BasicBlock * done = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBackDone");
    825             Value * newlyProduced = iBuilder->CreateSub(getProducedItemCount(mStreamSetOutputs[i].name), priorProduced[priorIdx]);
    826             Value * priorBlock = iBuilder->CreateLShr(priorProduced[priorIdx], log2BlockSize);
    827             Value * priorOffset = iBuilder->CreateAnd(priorProduced[priorIdx], iBuilder->getSize(iBuilder->getBitBlockWidth() - 1));
    828             Value * instance = getStreamSetBufferPtr(mStreamSetOutputs[i].name);
     817        if (auto cb = dyn_cast<SwizzledCopybackBuffer>(mStreamSetOutputBuffers[i]))  {
     818            BasicBlock * copyBack = iBuilder->CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBack");
     819            BasicBlock * done = iBuilder->CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBackDone");
     820            Value * newlyProduced = iBuilder->CreateSub(iBuilder->getProducedItemCount(mStreamSetOutputs[i].name), producedItemCount[i]);
     821            Value * priorBlock = iBuilder->CreateLShr(producedItemCount[i], log2BlockSize);
     822            Value * priorOffset = iBuilder->CreateAnd(producedItemCount[i], iBuilder->getSize(iBuilder->getBitBlockWidth() - 1));
     823            Value * instance = iBuilder->getStreamSetBufferPtr(mStreamSetOutputs[i].name);
    829824            Value * accessibleBlocks = cb->getLinearlyAccessibleBlocks(iBuilder, priorBlock);
    830825            Value * accessible = iBuilder->CreateSub(iBuilder->CreateShl(accessibleBlocks, log2BlockSize), priorOffset);
     
    838833            priorIdx++;
    839834        }
    840         if (CircularCopybackBuffer * const cb = dyn_cast<CircularCopybackBuffer>(mStreamSetOutputBuffers[i]))  {
    841             BasicBlock * copyBack = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBack");
    842             BasicBlock * done = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBackDone");
    843             Value * instance = getStreamSetBufferPtr(mStreamSetOutputs[i].name);
    844             Value * newlyProduced = iBuilder->CreateSub(getProducedItemCount(mStreamSetOutputs[i].name), priorProduced[priorIdx]);
    845             Value * accessible = cb->getLinearlyAccessibleItems(iBuilder, priorProduced[priorIdx]);
    846             Value * wraparound = iBuilder->CreateICmpULT(accessible, newlyProduced);
    847             iBuilder->CreateCondBr(wraparound, copyBack, done);
    848             iBuilder->SetInsertPoint(copyBack);
    849             Value * copyItems = iBuilder->CreateSub(newlyProduced, accessible);
    850             cb->createCopyBack(iBuilder, instance, copyItems);
    851             iBuilder->CreateBr(done);
    852             iBuilder->SetInsertPoint(done);
    853             priorIdx++;
    854         }
    855     }
    856 
    857 
    858     /// Call the do block method if necessary then restore the current function state to the do segement method
    859     if (!useIndirectBr()) {
    860         iBuilder->CreateRetVoid();
    861         mDoBlockMethod = mCurrentMethod;
    862         iBuilder->restoreIP(ip);
    863         iBuilder->CreateCall(mCurrentMethod, self);
    864         setInstance(self);
    865         mCurrentMethod = cp;
    866     }
    867 
    868 }
    869 
    870 inline void BlockOrientedKernel::writeFinalBlockMethod(Value * remainingItems) {
    871 
    872     Value * const self = getInstance();
    873     Function * const cp = mCurrentMethod;
    874     Value * const remainingItemCount = remainingItems;
    875     auto ip = iBuilder->saveIP();
    876 
    877     if (!useIndirectBr()) {
    878         FunctionType * const type = FunctionType::get(iBuilder->getVoidTy(), {self->getType(), iBuilder->getSizeTy()}, false);
    879         mCurrentMethod = Function::Create(type, GlobalValue::InternalLinkage, getName() + FINAL_BLOCK_SUFFIX, iBuilder->getModule());
    880         mCurrentMethod->setCallingConv(CallingConv::C);
    881         mCurrentMethod->setDoesNotThrow();
    882         mCurrentMethod->setDoesNotCapture(1);
    883         auto args = mCurrentMethod->arg_begin();
    884         args->setName("self");
    885         setInstance(&*args);
    886         remainingItems = &*(++args);
    887         remainingItems->setName("remainingItems");
    888         iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
    889     }
    890 
    891     generateFinalBlockMethod(remainingItems); // may be implemented by the BlockOrientedKernel subtype
    892 
    893     RecursivelyDeleteTriviallyDeadInstructions(remainingItems); // if remainingItems was not used, this will eliminate it.
    894 
    895     if (!useIndirectBr()) {
    896         iBuilder->CreateRetVoid();       
    897         iBuilder->restoreIP(ip);
    898         iBuilder->CreateCall(mCurrentMethod, {self, remainingItemCount});
    899         mCurrentMethod = cp;
    900         setInstance(self);
    901     }
    902 
    903 }
    904 
    905 //  The default finalBlock method simply dispatches to the doBlock routine.
    906 void BlockOrientedKernel::generateFinalBlockMethod(Value * /* remainingItems */) {
    907     CreateDoBlockMethodCall();
    908 }
    909 
    910 bool BlockOrientedKernel::useIndirectBr() const {
    911     return iBuilder->supportsIndirectBr();
    912 }
    913 
    914 void BlockOrientedKernel::CreateDoBlockMethodCall() {
    915     if (useIndirectBr()) {
    916         BasicBlock * bb = CreateBasicBlock("resume");
    917         mStrideLoopBranch->addDestination(bb);
    918         mStrideLoopTarget->addIncoming(BlockAddress::get(bb), iBuilder->GetInsertBlock());
    919         iBuilder->CreateBr(mStrideLoopBody);
    920         bb->moveAfter(iBuilder->GetInsertBlock());
    921         iBuilder->SetInsertPoint(bb);
    922     } else {
    923         iBuilder->CreateCall(mDoBlockMethod, getInstance());
    924     }
    925 }
    926 
    927 void Kernel::finalizeInstance() {
    928     assert ("KernelBuilder does not have a valid IDISA Builder" && iBuilder);
    929     mOutputScalarResult = iBuilder->CreateCall(getTerminateFunction(iBuilder->getModule()), { getInstance() });
    930 }
    931 
    932 Kernel::StreamPort Kernel::getStreamPort(const std::string & name) const {
    933     const auto f = mStreamMap.find(name);
    934     if (LLVM_UNLIKELY(f == mStreamMap.end())) {
    935         report_fatal_error(getName() + " does not contain stream set " + name);
    936     }
    937     return f->second;
    938 }
    939 
    940    
    941 void MultiBlockKernel::generateDoSegmentMethod() {
    942    
    943     // First prepare the multi-block method that will be used.
    944    
    945     std::vector<Type *> multiBlockParmTypes;
    946     multiBlockParmTypes.push_back(mKernelStateType->getPointerTo());
    947     for (auto buffer : mStreamSetInputBuffers) {
    948         multiBlockParmTypes.push_back(buffer->getPointerType());
    949     }
    950     for (auto buffer : mStreamSetOutputBuffers) {
    951         multiBlockParmTypes.push_back(buffer->getPointerType());
    952     }
    953     FunctionType * const type = FunctionType::get(iBuilder->getVoidTy(), multiBlockParmTypes, false);
    954     Function * multiBlockFunction = Function::Create(type, GlobalValue::InternalLinkage, getName() + MULTI_BLOCK_SUFFIX, iBuilder->getModule());
    955     multiBlockFunction->setCallingConv(CallingConv::C);
    956     multiBlockFunction->setDoesNotThrow();
    957     auto args = multiBlockFunction->arg_begin();
    958     args->setName("self");
    959     for (auto binding : mStreamSetInputs) {
    960         (++args)->setName(binding.name + "BufPtr");
    961     }
    962     for (auto binding : mStreamSetOutputs) {
    963         (args++)->setName(binding.name + "BufPtr");
    964     }
    965    
    966     // Now use the generateMultiBlockLogic method of the MultiBlockKernelBuilder subtype to
    967     // provide the required multi-block kernel logic.
    968     auto ip = iBuilder->saveIP();
    969     iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "multiBlockEntry", multiBlockFunction, 0));
    970     generateMultiBlockLogic();
    971     iBuilder->CreateRetVoid();
    972     iBuilder->restoreIP(ip);
    973    
    974     // Now proceed with creation of the doSegment method.
    975    
    976     BasicBlock * const entry = iBuilder->GetInsertBlock();
    977     BasicBlock * const doSegmentOuterLoop = CreateBasicBlock(getName() + "_doSegmentOuterLoop");
    978     BasicBlock * const doMultiBlockCall = CreateBasicBlock(getName() + "_doMultiBlockCall");
    979     BasicBlock * const finalBlockCheck = CreateBasicBlock(getName() + "_finalBlockCheck");
    980     BasicBlock * const doTempBufferBlock = CreateBasicBlock(getName() + "_doTempBufferBlock");
    981     BasicBlock * const segmentDone = CreateBasicBlock(getName() + "_segmentDone");
    982    
    983     Value * blockBaseMask = iBuilder->CreateNot(iBuilder->getSize(iBuilder->getBitBlockWidth() - 1));
    984    
    985     //
    986     //  A. Temporary Buffer Area Determination
    987     //
    988     // For final block processing and for processing near the end of physical buffer
    989     // boundaries, we need to allocate temporary space for processing a full block of input.
    990     // Compute the size requirements to store stream set data at the declared processing
    991     // rates in reference to one block of the principal input stream. 
    992     //
    993 
    994     unsigned bitBlockWidth = iBuilder->getBitBlockWidth();
    995     std::vector<Type *> tempBuffers;
    996     std::vector<unsigned> itemsPerPrincipalBlock;
    997     for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
    998         auto & rate = mStreamSetInputs[i].rate;
    999         std::string refSet = mStreamSetInputs[i].rate.referenceStreamSet();
    1000         if (refSet.empty()) {
    1001             itemsPerPrincipalBlock.push_back(rate.calculateRatio(bitBlockWidth));
    1002         }
    1003         else {
    1004             Port port; unsigned ssIdx;
    1005             std::tie(port, ssIdx) = getStreamPort(mStreamSetInputs[i].name);
    1006             assert (port == Port::Input && ssIdx < i);
    1007             itemsPerPrincipalBlock.push_back(rate.calculateRatio(itemsPerPrincipalBlock[ssIdx]));
    1008         }
    1009         unsigned blocks = (itemsPerPrincipalBlock.back() + bitBlockWidth - 1)/bitBlockWidth;
    1010         if (blocks > 1) {
    1011             tempBuffers.push_back(ArrayType::get(mStreamSetInputBuffers[i]->getType(), blocks));
    1012         }
    1013         else {
    1014             tempBuffers.push_back(mStreamSetInputBuffers[i]->getType());
    1015         }
    1016     }
    1017     for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    1018         auto & rate = mStreamSetOutputs[i].rate;
    1019         std::string refSet = mStreamSetOutputs[i].rate.referenceStreamSet();
    1020         if (refSet.empty()) {
    1021             itemsPerPrincipalBlock.push_back(rate.calculateRatio(bitBlockWidth));
    1022         }
    1023         else {
    1024             Port port; unsigned ssIdx;
    1025             std::tie(port, ssIdx) = getStreamPort(mStreamSetOutputs[i].name);
    1026             if (port == Port::Output) ssIdx += mStreamSetInputs.size();
    1027             itemsPerPrincipalBlock.push_back(rate.calculateRatio(itemsPerPrincipalBlock[ssIdx]));
    1028         }
    1029         unsigned blocks = (itemsPerPrincipalBlock.back() + bitBlockWidth - 1)/bitBlockWidth;
    1030         if (blocks > 1) {
    1031             tempBuffers.push_back(ArrayType::get(mStreamSetOutputBuffers[i]->getType(), blocks));
    1032         }
    1033         else {
    1034             tempBuffers.push_back(mStreamSetOutputBuffers[i]->getType());
    1035         }
    1036     }
    1037     Type * tempParameterStructType = StructType::create(iBuilder->getContext(), tempBuffers);
    1038     Value * tempParameterArea = iBuilder->CreateCacheAlignedAlloca(tempParameterStructType);
    1039    
    1040     ConstantInt * blockSize = iBuilder->getSize(iBuilder->getBitBlockWidth());
    1041     Value * availablePos = mAvailableItemCount[0];
    1042     Value * itemsAvail = availablePos;
    1043     //  Make sure that corresponding data is available depending on processing rate
    1044     //  for all input stream sets.
    1045     for (unsigned i = 1; i < mStreamSetInputs.size(); i++) {
    1046         Value * a = mAvailableItemCount[i];
    1047         auto & rate = mStreamSetInputs[i].rate;
    1048         assert (((rate.referenceStreamSet() == "") || (rate.referenceStreamSet() == mStreamSetInputs[0].name)) && "Multiblock kernel input rate not with respect to principal stream.");
    1049         Value * maxItems = rate.CreateMaxReferenceItemsCalculation(iBuilder, a);
    1050         itemsAvail = iBuilder->CreateSelect(iBuilder->CreateICmpULT(itemsAvail, maxItems), itemsAvail, maxItems);
    1051     }
    1052    
    1053     Value * processed = getProcessedItemCount(mStreamSetInputs[0].name);
    1054     Value * itemsToDo = iBuilder->CreateSub(itemsAvail, processed);
    1055     Value * fullBlocksToDo = iBuilder->CreateUDiv(itemsToDo, blockSize);
    1056     Value * excessItems = iBuilder->CreateURem(itemsToDo, blockSize);
    1057    
    1058     //  Now we iteratively process these blocks using the doMultiBlock method. 
    1059     //  In each iteration, we process the maximum number of linearly accessible
    1060     //  blocks on the principal input, reduced to ensure that the corresponding
    1061     //  data is linearly available at the specified processing rates for the other inputs,
    1062     //  and that each of the output buffers has sufficient linearly available space
    1063     //  (using overflow areas, if necessary) for the maximum output that can be
    1064     //  produced.
    1065    
    1066     //iBuilder->CreateCondBr(iBuilder->CreateICmpUGT(fullBlocksToDo, iBuilder->getSize(0)), doSegmentOuterLoop, finalBlockCheck);
    1067     iBuilder->CreateBr(doSegmentOuterLoop);
    1068    
    1069     iBuilder->SetInsertPoint(doSegmentOuterLoop);
    1070     PHINode * const blocksRemaining = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2, "blocksRemaining");
    1071     blocksRemaining->addIncoming(fullBlocksToDo, entry);
    1072    
    1073    
    1074     // For each input buffer, determine the processedItemCount, the block pointer for the
    1075     // buffer block containing the next item, and the number of linearly available items.
    1076     //
    1077     std::vector<Value *> processedItemCount;
    1078     std::vector<Value *> inputBlockPtr;
    1079     std::vector<Value *> producedItemCount;
    1080     std::vector<Value *> outputBlockPtr;
    1081    
    1082     //  Calculate linearly available blocks for all input stream sets.
    1083     Value * linearlyAvailBlocks = nullptr;
    1084     for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
    1085         Value * p = getProcessedItemCount(mStreamSetInputs[i].name);
    1086         Value * blkNo = iBuilder->CreateUDiv(p, blockSize);
    1087         Value * b = getInputStreamBlockPtr(mStreamSetInputs[i].name, iBuilder->getInt32(0));
    1088         processedItemCount.push_back(p);
    1089         inputBlockPtr.push_back(b);
    1090         auto & rate = mStreamSetInputs[i].rate;
    1091         Value * blocks = nullptr;
    1092         if ((rate.isFixedRatio()) && (rate.getRatioNumerator() == rate.getRatioDenominator()) && (rate.referenceStreamSet() == "")) {
    1093             blocks = mStreamSetInputBuffers[i]->getLinearlyAccessibleBlocks(iBuilder, blkNo);
    1094         }
    1095         else {
    1096             Value * linearlyAvailItems = mStreamSetInputBuffers[i]->getLinearlyAccessibleItems(iBuilder, p);
    1097             Value * items = rate.CreateMaxReferenceItemsCalculation(iBuilder, linearlyAvailItems);
    1098             blocks = iBuilder->CreateUDiv(items, blockSize);
    1099         }
    1100         if (i == 0) {
    1101             linearlyAvailBlocks = blocks;
    1102         }
    1103         else {
    1104             linearlyAvailBlocks = iBuilder->CreateSelect(iBuilder->CreateICmpULT(blocks, linearlyAvailBlocks), blocks, linearlyAvailBlocks);
    1105         }
    1106     }
    1107    
    1108     //  Now determine the linearly writeable blocks, based on available blocks reduced
    1109     //  by limitations of output buffer space.
    1110     Value * linearlyWritableBlocks = linearlyAvailBlocks;
    1111    
    1112     for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    1113         Value * p = getProducedItemCount(mStreamSetOutputs[i].name);
    1114         Value * blkNo = iBuilder->CreateUDiv(p, blockSize);
    1115         Value * b = getOutputStreamBlockPtr(mStreamSetOutputs[i].name, iBuilder->getInt32(0));
    1116         producedItemCount.push_back(p);
    1117         outputBlockPtr.push_back(b);
    1118         auto & rate = mStreamSetOutputs[i].rate;
    1119         Value * blocks = nullptr;
    1120         if ((rate.isFixedRatio()) && (rate.getRatioNumerator() == rate.getRatioDenominator())) {
    1121             blocks = mStreamSetOutputBuffers[0]->getLinearlyWritableBlocks(iBuilder, blkNo);
    1122         }
    1123         else {
    1124             Value * writableItems = mStreamSetOutputBuffers[0]->getLinearlyWritableItems(iBuilder, p);
    1125             blocks = iBuilder->CreateUDiv(writableItems, blockSize);
    1126         }
    1127         linearlyWritableBlocks = iBuilder->CreateSelect(iBuilder->CreateICmpULT(blocks, linearlyWritableBlocks), blocks, linearlyWritableBlocks);
    1128     }
    1129     Value * haveBlocks = iBuilder->CreateICmpUGT(linearlyWritableBlocks, iBuilder->getSize(0));
    1130    
    1131     iBuilder->CreateCondBr(haveBlocks, doMultiBlockCall, doTempBufferBlock);
    1132    
    1133     //  At this point we have verified the availability of one or more blocks of input data and output buffer space for all stream sets.
    1134     //  Now prepare the doMultiBlock call.
    1135     iBuilder->SetInsertPoint(doMultiBlockCall);
    1136    
    1137     Value * linearlyAvailItems = iBuilder->CreateMul(linearlyWritableBlocks, blockSize);
    1138    
    1139     std::vector<Value *> doMultiBlockArgs;
    1140     doMultiBlockArgs.push_back(linearlyAvailItems);
    1141     for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
    1142         doMultiBlockArgs.push_back(getRawInputPointer(mStreamSetInputs[i].name, iBuilder->getInt32(0), processedItemCount[i]));
    1143     }
    1144     for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    1145         doMultiBlockArgs.push_back(getRawOutputPointer(mStreamSetOutputs[i].name, iBuilder->getInt32(0), producedItemCount[i]));
    1146     }
    1147        
    1148     iBuilder->CreateCall(multiBlockFunction, doMultiBlockArgs);
    1149    
    1150     // Do copybacks if necessary.
    1151     unsigned priorIdx = 0;
    1152     for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    1153         Value * log2BlockSize = iBuilder->getSize(std::log2(iBuilder->getBitBlockWidth()));
    1154         if (auto cb = dyn_cast<SwizzledCopybackBuffer>(mStreamSetOutputBuffers[i]))  {
    1155             BasicBlock * copyBack = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBack");
    1156             BasicBlock * done = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBackDone");
    1157             Value * newlyProduced = iBuilder->CreateSub(getProducedItemCount(mStreamSetOutputs[i].name), producedItemCount[i]);
    1158             Value * priorBlock = iBuilder->CreateLShr(producedItemCount[i], log2BlockSize);
    1159             Value * priorOffset = iBuilder->CreateAnd(producedItemCount[i], iBuilder->getSize(iBuilder->getBitBlockWidth() - 1));
    1160             Value * instance = getStreamSetBufferPtr(mStreamSetOutputs[i].name);
    1161             Value * accessibleBlocks = cb->getLinearlyAccessibleBlocks(iBuilder, priorBlock);
    1162             Value * accessible = iBuilder->CreateSub(iBuilder->CreateShl(accessibleBlocks, log2BlockSize), priorOffset);
    1163             Value * wraparound = iBuilder->CreateICmpULT(accessible, newlyProduced);
    1164             iBuilder->CreateCondBr(wraparound, copyBack, done);
    1165             iBuilder->SetInsertPoint(copyBack);
    1166             Value * copyItems = iBuilder->CreateSub(newlyProduced, accessible);
    1167             cb->createCopyBack(iBuilder, instance, copyItems);
    1168             iBuilder->CreateBr(done);
    1169             iBuilder->SetInsertPoint(done);
    1170             priorIdx++;
    1171         }
    1172835        if (auto cb = dyn_cast<CircularCopybackBuffer>(mStreamSetOutputBuffers[i]))  {
    1173             BasicBlock * copyBack = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBack");
    1174             BasicBlock * done = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBackDone");
    1175             Value * instance = getStreamSetBufferPtr(mStreamSetOutputs[i].name);
    1176             Value * newlyProduced = iBuilder->CreateSub(getProducedItemCount(mStreamSetOutputs[i].name), producedItemCount[i]);
     836            BasicBlock * copyBack = iBuilder->CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBack");
     837            BasicBlock * done = iBuilder->CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBackDone");
     838            Value * instance = iBuilder->getStreamSetBufferPtr(mStreamSetOutputs[i].name);
     839            Value * newlyProduced = iBuilder->CreateSub(iBuilder->getProducedItemCount(mStreamSetOutputs[i].name), producedItemCount[i]);
    1177840            Value * accessible = cb->getLinearlyAccessibleItems(iBuilder, producedItemCount[i]);
    1178841            Value * wraparound = iBuilder->CreateICmpULT(accessible, newlyProduced);
     
    1186849        }
    1187850    }
    1188     setProcessedItemCount(mStreamSetInputs[0].name, iBuilder->CreateAdd(processed, linearlyAvailItems));
     851    iBuilder->setProcessedItemCount(mStreamSetInputs[0].name, iBuilder->CreateAdd(processed, linearlyAvailItems));
    1189852    Value * reducedBlocksToDo = iBuilder->CreateSub(blocksRemaining, linearlyWritableBlocks);
    1190853    Value * fullBlocksRemain = iBuilder->CreateICmpUGT(reducedBlocksToDo, iBuilder->getSize(0));
     
    1192855    blocksRemaining->addIncoming(reducedBlocksToDo, multiBlockFinal);
    1193856    iBuilder->CreateCondBr(fullBlocksRemain, doSegmentOuterLoop, finalBlockCheck);
    1194    
     857
    1195858    // All the full blocks of input have been processed.  If mIsFinal is true,
    1196859    // we should process the remaining partial block (i.e., excessItems as determined at entry).
    1197860    iBuilder->SetInsertPoint(finalBlockCheck);
    1198861    iBuilder->CreateCondBr(mIsFinal, doTempBufferBlock, segmentDone);
    1199    
    1200     // 
     862
     863    //
    1201864    // We use temporary buffers in 3 different cases that preclude full block processing.
    1202865    // (a) One or more input buffers does not have a sufficient number of input items linearly available.
     
    1210873    tempBlockItems->addIncoming(blockSize, doSegmentOuterLoop);
    1211874    tempBlockItems->addIncoming(excessItems, finalBlockCheck);
    1212    
     875
    1213876    // Will this be the final block processing?
    1214877    Value * doFinal = iBuilder->CreateICmpULT(tempBlockItems, blockSize);
    1215    
     878
    1216879    // Begin constructing the doMultiBlock args.
    1217880    std::vector<Value *> tempArgs;
    1218881    tempArgs.push_back(tempBlockItems);
    1219    
     882
    1220883    // Prepare the temporary buffer area.
    1221884    //
     
    1223886    Constant * const tempAreaSize = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(tempParameterStructType), iBuilder->getSizeTy(), false);
    1224887    iBuilder->CreateMemZero(tempParameterArea, tempAreaSize);
    1225    
     888
    1226889    // For each input and output buffer, copy over necessary data starting from the last
    1227890    // block boundary.
     
    1232895        Value * tempBufPtr = iBuilder->CreateGEP(tempParameterArea, iBuilder->getInt32(i));
    1233896        tempBufPtr = iBuilder->CreatePointerCast(tempBufPtr, mStreamSetInputBuffers[i]->getPointerType());
    1234        
     897
    1235898        auto & rate = mStreamSetInputs[i].rate;
    1236899        Value * blockItemPos = iBuilder->CreateAnd(processedItemCount[i], blockBaseMask);
    1237        
     900
    1238901        // The number of items to copy is determined by the processing rate requirements.
    1239902        if (i > 1) {
     
    1255918        mStreamSetInputBuffers[i]->createBlockAlignedCopy(iBuilder, tempBufPtr, inputBlockPtr[i], copyItems1);
    1256919        Value * nextBufPtr = iBuilder->CreateGEP(tempBufPtr, iBuilder->CreateUDiv(availFromBase, blockSize));
    1257         mStreamSetInputBuffers[i]->createBlockAlignedCopy(iBuilder, nextBufPtr, getStreamSetBufferPtr(mStreamSetInputs[i].name), copyItems2);
    1258         Value * itemAddress = iBuilder->CreatePtrToInt(getRawOutputPointer(mStreamSetInputs[i].name, iBuilder->getInt32(0), processedItemCount[i]), iBuilder->getSizeTy());
     920        mStreamSetInputBuffers[i]->createBlockAlignedCopy(iBuilder, nextBufPtr, iBuilder->getStreamSetBufferPtr(mStreamSetInputs[i].name), copyItems2);
     921        Value * itemAddress = iBuilder->CreatePtrToInt(iBuilder->getRawOutputPointer(mStreamSetInputs[i].name, iBuilder->getInt32(0), processedItemCount[i]), iBuilder->getSizeTy());
    1259922        Value * baseAddress = iBuilder->CreatePtrToInt(inputBlockPtr[i], iBuilder->getSizeTy());
    1260923        Value * tempAddress = iBuilder->CreateAdd(iBuilder->CreatePtrToInt(tempBufPtr, iBuilder->getSizeTy()), iBuilder->CreateSub(itemAddress, baseAddress));
     
    1268931        blockItemPos.push_back(iBuilder->CreateAnd(producedItemCount[i], blockBaseMask));
    1269932        mStreamSetOutputBuffers[i]->createBlockAlignedCopy(iBuilder, tempBufPtr, outputBlockPtr[i], iBuilder->CreateSub(producedItemCount[i], blockItemPos[i]));
    1270         Value * itemAddress = iBuilder->CreatePtrToInt(getRawOutputPointer(mStreamSetInputs[i].name, iBuilder->getInt32(0), producedItemCount[i]), iBuilder->getSizeTy());
     933        Value * itemAddress = iBuilder->CreatePtrToInt(iBuilder->getRawOutputPointer(mStreamSetInputs[i].name, iBuilder->getInt32(0), producedItemCount[i]), iBuilder->getSizeTy());
    1271934        Value * baseAddress = iBuilder->CreatePtrToInt(outputBlockPtr[i], iBuilder->getSizeTy());
    1272935        Value * tempAddress = iBuilder->CreateAdd(iBuilder->CreatePtrToInt(tempBufPtr, iBuilder->getSizeTy()), iBuilder->CreateSub(itemAddress, baseAddress));
     
    1277940
    1278941    // Copy back data to the actual output buffers.
    1279    
     942
    1280943    for (unsigned i = 0; i < mStreamSetOutputBuffers.size(); i++) {
    1281944        Value * tempBufPtr = iBuilder->CreateGEP(tempParameterArea, iBuilder->getInt32(mStreamSetInputs.size() + i));
    1282945        tempBufPtr = iBuilder->CreatePointerCast(tempBufPtr, mStreamSetOutputBuffers[i]->getPointerType());
    1283         Value * final_items = getProducedItemCount(mStreamSetOutputs[i].name);
     946        Value * final_items = iBuilder->getProducedItemCount(mStreamSetOutputs[i].name);
    1284947        Value * copyItems = iBuilder->CreateSub(final_items, blockItemPos[i]);
    1285948        Value * copyItems1 = mStreamSetOutputBuffers[i]->getLinearlyWritableItems(iBuilder, blockItemPos[i]); // must be a whole number of blocks.
     
    1287950        Value * copyItems2 = iBuilder->CreateSelect(iBuilder->CreateICmpULT(copyItems, copyItems), iBuilder->getSize(0), iBuilder->CreateSub(copyItems, copyItems1));
    1288951        tempBufPtr = iBuilder->CreateGEP(tempBufPtr, iBuilder->CreateUDiv(copyItems1, blockSize));
    1289         mStreamSetOutputBuffers[i]->createBlockAlignedCopy(iBuilder, getStreamSetBufferPtr(mStreamSetOutputs[i].name), tempBufPtr, copyItems2);
    1290     }
    1291 
    1292     setProcessedItemCount(mStreamSetInputs[0].name, finalItemPos[0]);
     952        mStreamSetOutputBuffers[i]->createBlockAlignedCopy(iBuilder, iBuilder->getStreamSetBufferPtr(mStreamSetOutputs[i].name), tempBufPtr, copyItems2);
     953    }
     954
     955    iBuilder->setProcessedItemCount(mStreamSetInputs[0].name, finalItemPos[0]);
    1293956
    1294957    //  We've dealt with the partial block processing and copied information back into the
     
    1298961    iBuilder->SetInsertPoint(segmentDone);
    1299962}
    1300                                                            
     963
     964void Kernel::finalizeInstance(const std::unique_ptr<KernelBuilder> & idb) {
     965    assert ("KernelBuilder does not have a valid IDISA Builder" && idb);
     966    mOutputScalarResult = idb->CreateCall(getTerminateFunction(idb->getModule()), { getInstance() });
     967}
     968
     969Kernel::StreamPort Kernel::getStreamPort(const std::string & name) const {
     970    const auto f = mStreamMap.find(name);
     971    if (LLVM_UNLIKELY(f == mStreamMap.end())) {
     972        report_fatal_error(getName() + " does not contain stream set " + name);
     973    }
     974    return f->second;
     975}
     976
    1301977// CONSTRUCTOR
    1302978Kernel::Kernel(std::string && kernelName,
     
    13451021}
    13461022
    1347 // CONSTRUCTOR
    1348 MultiBlockKernel::MultiBlockKernel(std::string && kernelName,
    1349                                    std::vector<Binding> && stream_inputs,
    1350                                    std::vector<Binding> && stream_outputs,
    1351                                    std::vector<Binding> && scalar_parameters,
    1352                                    std::vector<Binding> && scalar_outputs,
    1353                                    std::vector<Binding> && internal_scalars)
    1354 : Kernel(std::move(kernelName), std::move(stream_inputs), std::move(stream_outputs), std::move(scalar_parameters), std::move(scalar_outputs), std::move(internal_scalars)) {
    1355    
    1356 }
    1357 }
     1023}
  • icGREP/icgrep-devel/icgrep/kernels/kernel.h

    r5439 r5440  
    2525
    2626class Kernel : public KernelInterface {
     27    friend class KernelBuilder;
    2728protected:
    2829    using KernelMap = boost::container::flat_map<std::string, unsigned>;
     
    3233    using StreamSetBuffers = std::vector<parabix::StreamSetBuffer *>;
    3334    using Kernels = std::vector<Kernel *>;
    34 
    35     friend class KernelBuilder;
    36     friend void ::generateSegmentParallelPipeline(const std::unique_ptr<kernel::KernelBuilder> &, const Kernels &);
    37     friend void ::generatePipelineLoop(const std::unique_ptr<kernel::KernelBuilder> &, const Kernels &);
    38     friend void ::generateParallelPipeline(const std::unique_ptr<kernel::KernelBuilder> &, const Kernels &);
    3935
    4036    static const std::string DO_BLOCK_SUFFIX;
     
    7975    bool isCachable() const override { return false; }
    8076
    81     std::string makeSignature() override;
     77    std::string makeSignature(const std::unique_ptr<KernelBuilder> & idb) override;
    8278
    8379    // Can the module ID itself serve as the unique signature?
     
    8783    //
    8884
    89     void createKernelStub(const StreamSetBuffers & inputs, const StreamSetBuffers & outputs);
    90 
    91     void createKernelStub(const StreamSetBuffers & inputs, const StreamSetBuffers & outputs, llvm::Module * const kernelModule);
     85    void createKernelStub(const std::unique_ptr<KernelBuilder> & idb, const StreamSetBuffers & inputs, const StreamSetBuffers & outputs);
     86
     87    void createKernelStub(const std::unique_ptr<KernelBuilder> & idb, const StreamSetBuffers & inputs, const StreamSetBuffers & outputs, llvm::Module * const kernelModule);
    9288
    9389    llvm::Module * getModule() const {
     
    9591    }
    9692
    97     // Generate the Kernel to the current module (iBuilder->getModule()).
    98     void generateKernel();
     93    void generateKernel(const std::unique_ptr<kernel::KernelBuilder> & idb);
    9994   
    100     llvm::Value * createInstance() final;
    101 
    102     void initializeInstance() final;
    103 
    104     void finalizeInstance() final;
    105 
    106     llvm::Value * getProducedItemCount(const std::string & name, llvm::Value * doFinal = nullptr) const final;
    107 
    108     void setProducedItemCount(const std::string & name, llvm::Value * value) const final;
    109 
    110     llvm::Value * getProcessedItemCount(const std::string & name) const final;
    111 
    112     void setProcessedItemCount(const std::string & name, llvm::Value * value) const final;
    113 
    114     llvm::Value * getConsumedItemCount(const std::string & name) const final;
    115 
    116     void setConsumedItemCount(const std::string & name, llvm::Value * value) const final;
    117 
    118     llvm::Value * getTerminationSignal() const final;
    119 
    120     void setTerminationSignal() const final;
    121 
    122     // Get the value of a scalar field for the current instance.
    123     llvm::Value * getScalarFieldPtr(llvm::Value * index) const;
    124 
    125     llvm::Value * getScalarFieldPtr(const std::string & fieldName) const;
    126 
    127     llvm::Value * getScalarField(const std::string & fieldName) const;
    128 
    129     // Set the value of a scalar field for the current instance.
    130     void setScalarField(const std::string & fieldName, llvm::Value * value) const;
    131 
    132     // Synchronization actions for executing a kernel for a particular logical segment.
    133     //
    134     // Before the segment is processed, acquireLogicalSegmentNo must be used to load
    135     // the segment number of the kernel state to ensure that the previous segment is
    136     // complete (by checking that the acquired segment number is equal to the desired segment
    137     // number).
    138     // After all segment processing actions for the kernel are complete, and any necessary
    139     // data has been extracted from the kernel for further pipeline processing, the
    140     // segment number must be incremented and stored using releaseLogicalSegmentNo.
    141     llvm::LoadInst * acquireLogicalSegmentNo() const;
    142 
    143     void releaseLogicalSegmentNo(llvm::Value * nextSegNo) const;
     95    llvm::Value * createInstance(const std::unique_ptr<kernel::KernelBuilder> & idb) final;
     96
     97    void initializeInstance(const std::unique_ptr<KernelBuilder> & idb) final;
     98
     99    void finalizeInstance(const std::unique_ptr<kernel::KernelBuilder> & idb) final;
    144100
    145101    bool hasNoTerminateAttribute() const {
     
    162118        return mStreamSetOutputBuffers[i];
    163119    }
    164 
    165     llvm::CallInst * createDoSegmentCall(const std::vector<llvm::Value *> & args) const;
    166 
    167     llvm::Value * getAccumulator(const std::string & accumName) const;
    168120
    169121    virtual ~Kernel() = 0;
     
    194146    }
    195147
     148    unsigned getScalarIndex(const std::string & name) const;
     149
    196150    void prepareStreamSetNameMap();
    197151
    198     void linkExternalMethods() override { }
    199 
    200     virtual void prepareKernel();
    201 
    202     virtual void generateInitializeMethod() { }
     152    void linkExternalMethods(const std::unique_ptr<kernel::KernelBuilder> &) override { }
     153
     154    virtual void prepareKernel(const std::unique_ptr<KernelBuilder> & idb);
     155
     156    virtual void generateInitializeMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) { }
    203157   
    204     virtual void generateDoSegmentMethod() = 0;
    205 
    206     virtual void generateFinalizeMethod() { }
     158    virtual void generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & iBuilder) = 0;
     159
     160    virtual void generateFinalizeMethod(const std::unique_ptr<KernelBuilder> & iBuilder) { }
    207161
    208162    // Add an additional scalar field to the KernelState struct.
     
    212166    unsigned addUnnamedScalar(llvm::Type * type);
    213167
    214     // Run-time access of Kernel State and parameters of methods for
    215     // use in implementing kernels.
    216    
    217     // Get the index of a named scalar field within the kernel state struct.
    218     unsigned getScalarIndex(const std::string & name) const;
    219 
    220     llvm::Value * getInputStreamBlockPtr(const std::string & name, llvm::Value * streamIndex) const;
    221 
    222     llvm::Value * loadInputStreamBlock(const std::string & name, llvm::Value * streamIndex) const;
    223    
    224     llvm::Value * getInputStreamPackPtr(const std::string & name, llvm::Value * streamIndex, llvm::Value * packIndex) const;
    225    
    226     llvm::Value * loadInputStreamPack(const std::string & name, llvm::Value * streamIndex, llvm::Value * packIndex) const;
    227    
    228     llvm::Value * getInputStreamSetCount(const std::string & name) const;
    229 
    230     llvm::Value * getOutputStreamBlockPtr(const std::string & name, llvm::Value * streamIndex) const;
    231    
    232     void storeOutputStreamBlock(const std::string & name, llvm::Value * streamIndex, llvm::Value * toStore) const;
    233    
    234     llvm::Value * getOutputStreamPackPtr(const std::string & name, llvm::Value * streamIndex, llvm::Value * packIndex) const;
    235    
    236     void storeOutputStreamPack(const std::string & name, llvm::Value * streamIndex, llvm::Value * packIndex, llvm::Value * toStore) const;
    237 
    238     llvm::Value * getOutputStreamSetCount(const std::string & name) const;
    239 
    240     llvm::Value * getAdjustedInputStreamBlockPtr(llvm::Value * blockAdjustment, const std::string & name, llvm::Value * streamIndex) const;
    241 
    242     llvm::Value * getRawInputPointer(const std::string & name, llvm::Value * streamIndex, llvm::Value * absolutePosition) const;
    243 
    244     llvm::Value * getRawOutputPointer(const std::string & name, llvm::Value * streamIndex, llvm::Value * absolutePosition) const;
    245 
    246     llvm::Value * getBaseAddress(const std::string & name) const;
    247 
    248     void setBaseAddress(const std::string & name, llvm::Value * addr) const;
    249 
    250     llvm::Value * getBufferedSize(const std::string & name) const;
    251 
    252     void setBufferedSize(const std::string & name, llvm::Value * size) const;
    253 
    254     void reserveBytes(const std::string & name, llvm::Value * requested) const;
    255 
    256     llvm::Value * getAvailableItemCount(const std::string & name) const;
    257 
    258     llvm::Value * getLinearlyAccessibleItems(const std::string & name, llvm::Value * fromPosition) const;
    259 
    260     llvm::BasicBlock * CreateWaitForConsumers() const;
    261 
    262     llvm::BasicBlock * CreateBasicBlock(std::string && name) const;
    263 
    264     llvm::Value * getStreamSetBufferPtr(const std::string & name) const;
    265 
    266168    llvm::Value * getIsFinal() const {
    267169        return mIsFinal;
    268170    }
    269171
    270     void callGenerateInitializeMethod();
    271 
    272     void callGenerateDoSegmentMethod();
    273 
    274     void callGenerateFinalizeMethod();
     172    void callGenerateInitializeMethod(const std::unique_ptr<KernelBuilder> & idb);
     173
     174    void callGenerateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & idb);
     175
     176    void callGenerateFinalizeMethod(const std::unique_ptr<KernelBuilder> & idb);
    275177
    276178    StreamPort getStreamPort(const std::string & name) const;
     
    304206private:
    305207
    306     llvm::Value * getConsumerLock(const std::string & name) const;
    307 
    308     void setConsumerLock(const std::string & name, llvm::Value * value) const;
    309 
    310     llvm::Value * computeBlockIndex(const std::vector<Binding> & binding, const std::string & name, llvm::Value * itemCount) const;
     208    llvm::Value * getAvailableItemCount(const unsigned i) const {
     209        return mAvailableItemCount[i];
     210    }
    311211
    312212protected:
     
    344244protected:
    345245
    346     void CreateDoBlockMethodCall();
     246    void CreateDoBlockMethodCall(const std::unique_ptr<KernelBuilder> & idb);
    347247
    348248    // Each kernel builder subtype must provide its own logic for generating
    349249    // doBlock calls.
    350     virtual void generateDoBlockMethod() = 0;
     250    virtual void generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & idb) = 0;
    351251
    352252    // Each kernel builder subtypre must also specify the logic for processing the
     
    357257    // not be overridden.
    358258
    359     virtual void generateFinalBlockMethod(llvm::Value * remainingItems);
    360 
    361     void generateDoSegmentMethod() override final;
     259    virtual void generateFinalBlockMethod(const std::unique_ptr<KernelBuilder> & idb, llvm::Value * remainingItems);
     260
     261    void generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & idb) final;
    362262
    363263    BlockOrientedKernel(std::string && kernelName,
     
    370270private:
    371271
    372     virtual bool useIndirectBr() const;
    373 
    374     void writeDoBlockMethod();
    375 
    376     void writeFinalBlockMethod(llvm::Value * remainingItems);
     272    void writeDoBlockMethod(const std::unique_ptr<KernelBuilder> & idb);
     273
     274    void writeFinalBlockMethod(const std::unique_ptr<KernelBuilder> & idb, llvm::Value * remainingItems);
    377275
    378276private:
     
    384282};
    385283
    386 /*   
     284/*
    387285The Multi-Block Kernel Builder
    388286------------------------------
     
    391289efficient kernels with possibly variable and/or nonaligned output, subject to
    392290exact or MaxRatio processing constraints.   The following restrictions apply.
    393    
     291
    394292#.  The input consists of one or more stream sets, the first of which is
    395     known as the principal input stream set. 
    396    
     293    known as the principal input stream set.
     294
    397295#.  If there is more than one input stream set, the additional stream sets must
    398296    have a processing rate defined with respect to the input stream set of one
     
    400298    declared without a processing rate attribute have the FixedRate(1) attribute
    401299    by default and therefore satisfy this constraint.
    402    
     300
    403301#.  All output stream sets must be declared with processing rate attributes
    404302    of one of the following types:
    405303    *  FixedRate, Add1, Roundup, or MaxRatio with respect to the principal input stream set.
    406304    *  FixedRate with respect to some other output stream set.
    407    
     305
    408306    When using the Multi-Block Kernel Builder to program a new type of kernel,
    409307    the programmer must implement the generateDoMultiBlockMethod for normal
    410308    multi-block processing according to the requirements below, as well as
    411309    providing for special final block processing, if necessary.
    412            
     310
    413311#.  The doMultiBlockMethod will be called with the following parameters:
    414312    * the number of items of the principal input stream to process (itemsToDo),
     
    438336    * for any input pointer p, a GEP instruction with a single int32 index i
    439337      will produce a pointer to the buffer position corresponding to the ith block of the
    440       principal input stream set. 
     338      principal input stream set.
    441339    * for any output stream set declared with a Fixed or Add1 processing rate with respect
    442340      to the principal input stream set, a GEP instruction with a single int32 index i
    443341      will produce a pointer to the buffer position corresponding to the ith block of the
    444342      principal input stream set.
    445                    
     343
    446344#.  Upon completion of multi-block processing, the Multi-Block Kernel Builder will arrange that
    447345    processed and produced item counts are updated for all stream sets that have exact
    448346    processing rate attributes.   Programmers are responsible for updating the producedItemCount
    449347    of any stream set declared with a variable attribute (MaxRatio).
    450                            
     348
    451349#.  An important caveat is that buffer areas may change arbitrarily between
    452350    calls to the doMultiBlockMethod.   In no case should a kernel store a
     
    467365
    468366    // Each multi-block kernel subtype must provide its own logic for handling
    469     // doMultiBlock calls, subject to the requirements laid out above. 
     367    // doMultiBlock calls, subject to the requirements laid out above.
    470368    // The generateMultiBlockLogic must be written to generate this logic, given
    471369    // a created but empty function.  Upon entry to generateMultiBlockLogic,
    472370    // the builder insertion point will be set to the entry block; upone
    473371    // exit the RetVoid instruction will be added to complete the method.
    474     //
    475     virtual void generateMultiBlockLogic () = 0;
     372    //
     373    virtual void generateMultiBlockLogic() = 0;
     374
     375private:
    476376
    477377    // Given a kernel subtype with an appropriate interface, the generateDoSegment
    478378    // method of the multi-block kernel builder makes all the necessary arrangements
    479379    // to translate doSegment calls into a minimal sequence of doMultiBlock calls.
    480     void generateDoSegmentMethod() override final;
     380    void generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & idb) final;
     381
    481382};
    482    
    483    
     383
     384
    484385}
    485386#endif
  • icGREP/icgrep-devel/icgrep/kernels/kernel_builder.cpp

    r5439 r5440  
    1010namespace kernel {
    1111
    12 Value * KernelBuilder::getScalarFieldPtr(Value * const index) {
    13     return CreateGEP(mKernel->getInstance(), {getInt32(0), index});
    14 }
    15 
    16 Value * KernelBuilder::getScalarFieldPtr(const std::string & fieldName) {
    17     return getScalarFieldPtr(getInt32(mKernel->getScalarIndex(fieldName)));
     12Value * KernelBuilder::getScalarFieldPtr(llvm::Value * instance, Value * const index) {
     13    return CreateGEP(instance, {getInt32(0), index});
     14}
     15
     16Value * KernelBuilder::getScalarFieldPtr(llvm::Value * instance, const std::string & fieldName) {
     17    return getScalarFieldPtr(instance, getInt32(mKernel->getScalarIndex(fieldName)));
     18}
     19
     20llvm::Value * KernelBuilder::getScalarFieldPtr(llvm::Value * index) {
     21    return getScalarFieldPtr(mKernel->getInstance(), index);
     22}
     23
     24llvm::Value *KernelBuilder:: getScalarFieldPtr(const std::string & fieldName) {
     25    return getScalarFieldPtr(mKernel->getInstance(), fieldName);
    1826}
    1927
     
    4755        std::string principalField;
    4856        if (refSet.empty()) {
    49             const auto & principleSet = mKernel->getStreamOutput(0).name;
    5057            if (mKernel->getStreamInputs().empty()) {
    51                 principalField = principleSet + Kernel::PRODUCED_ITEM_COUNT_SUFFIX;
     58                principalField = mKernel->getStreamOutput(0).name + Kernel::PRODUCED_ITEM_COUNT_SUFFIX;
    5259            } else {
    53                 principalField = principleSet + Kernel::PROCESSED_ITEM_COUNT_SUFFIX;
     60                principalField = mKernel->getStreamInput(0).name + Kernel::PROCESSED_ITEM_COUNT_SUFFIX;
    5461            }
    5562        } else {
     
    8491
    8592Value * KernelBuilder::getAvailableItemCount(const std::string & name) {
    86 //    for (unsigned i = 0; i < mStreamSetInputs.size(); ++i) {
    87 //        if (mStreamSetInputs[i].name == name) {
    88 //            return mAvailableItemCount[i];
    89 //        }
    90 //    }
     93    const auto & inputs = mKernel->getStreamInputs();
     94    for (unsigned i = 0; i < inputs.size(); ++i) {
     95        if (inputs[i].name == name) {
     96            return mKernel->getAvailableItemCount(i);
     97        }
     98    }
    9199    return nullptr;
    92100}
     
    220228}
    221229
    222 BasicBlock * KernelBuilder::CreateWaitForConsumers() {
     230
     231CallInst * KernelBuilder::createDoSegmentCall(const std::vector<Value *> & args) {
     232    Function * const doSegment = mKernel->getDoSegmentFunction(getModule());
     233    assert (doSegment->getArgumentList().size() == args.size());
     234    return CreateCall(doSegment, args);
     235}
     236
     237Value * KernelBuilder::getAccumulator(const std::string & accumName) {
     238    auto results = mKernel->mOutputScalarResult;
     239    if (LLVM_UNLIKELY(results == nullptr)) {
     240        report_fatal_error("Cannot get accumulator " + accumName + " until " + mKernel->getName() + " has terminated.");
     241    }
     242    const auto & outputs = mKernel->getScalarOutputs();
     243    const auto n = outputs.size();
     244    if (LLVM_UNLIKELY(n == 0)) {
     245        report_fatal_error(mKernel->getName() + " has no output scalars.");
     246    } else {
     247        for (unsigned i = 0; i < n; ++i) {
     248            const Binding & b = outputs[i];
     249            if (b.name == accumName) {
     250                if (n == 1) {
     251                    return results;
     252                } else {
     253                    return CreateExtractValue(results, {i});
     254                }
     255            }
     256        }
     257        report_fatal_error(mKernel->getName() + " has no output scalar named " + accumName);
     258    }
     259}
     260
     261BasicBlock * KernelBuilder::CreateConsumerWait() {
    223262    const auto consumers = mKernel->getStreamOutputs();
    224263    BasicBlock * const entry = GetInsertBlock();
  • icGREP/icgrep-devel/icgrep/kernels/kernel_builder.h

    r5436 r5440  
    5454    // use in implementing kernels.
    5555
    56     // Get the index of a named scalar field within the kernel state struct.
    57     llvm::ConstantInt * getScalarIndex(const std::string & name);
    58 
    5956    llvm::Value * getInputStreamBlockPtr(const std::string & name, llvm::Value * streamIndex);
    6057
     
    9592    llvm::Value * getLinearlyAccessibleItems(const std::string & name, llvm::Value * fromPosition);
    9693
    97     llvm::BasicBlock * CreateWaitForConsumers();
     94    llvm::BasicBlock * CreateConsumerWait();
    9895
    9996    llvm::Value * getStreamSetBufferPtr(const std::string & name);
     97
     98    llvm::CallInst * createDoSegmentCall(const std::vector<llvm::Value *> & args);
     99
     100    llvm::Value * getAccumulator(const std::string & accumName);
     101
     102    llvm::Value * getConsumerLock(const std::string & name);
     103
     104    void setConsumerLock(const std::string & name, llvm::Value * value);
    100105
    101106    Kernel * getKernel() const {
     
    114119    }
    115120
     121    llvm::Value * getScalarFieldPtr(llvm::Value * instance, llvm::Value * index);
     122
     123    llvm::Value * getScalarFieldPtr(llvm::Value * instance, const std::string & fieldName);
     124
    116125private:
    117 
    118     llvm::Value * getConsumerLock(const std::string & name);
    119 
    120     void setConsumerLock(const std::string & name, llvm::Value * value);
    121126
    122127    llvm::Value * computeBlockIndex(llvm::Value * itemCount);
  • icGREP/icgrep-devel/icgrep/kernels/lz4_bytestream_decoder.cpp

    r5436 r5440  
    1111using namespace kernel;
    1212
    13 Value * getInputPtr(IDISA::IDISA_Builder * const iBuilder, Value * blockStartPtr, Value * offset) {
     13Value * getInputPtr(const std::unique_ptr<KernelBuilder> & iBuilder, Value * blockStartPtr, Value * offset) {
    1414    return iBuilder->CreateGEP(
    1515            iBuilder->CreatePointerCast(blockStartPtr, iBuilder->getInt32Ty()->getPointerTo()),
     
    1818}
    1919
    20 Value * selectMin(IDISA::IDISA_Builder * const iBuilder, Value * a, Value * b) {
     20Value * selectMin(const std::unique_ptr<KernelBuilder> & iBuilder, Value * a, Value * b) {
    2121    return iBuilder->CreateSelect(iBuilder->CreateICmpULT(a, b), a, b);
    2222}
    2323
    24 void LZ4ByteStreamDecoderKernel::generateDoBlockMethod() {
     24void LZ4ByteStreamDecoderKernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
    2525    BasicBlock * entry_block = iBuilder->GetInsertBlock();
    26     BasicBlock * loopBody = CreateBasicBlock("bytestream_block_loop_body");
    27     BasicBlock * loopExit = CreateBasicBlock("bytestream_block_loop_exit");
     26    BasicBlock * loopBody = iBuilder->CreateBasicBlock("bytestream_block_loop_body");
     27    BasicBlock * loopExit = iBuilder->CreateBasicBlock("bytestream_block_loop_exit");
    2828
    2929    Value * bufferSize = iBuilder->getSize(mBufferSize);
     
    3131    Value * iterations = selectMin(iBuilder,
    3232            iBuilder->getSize(iBuilder->getBitBlockWidth()),
    33             iBuilder->CreateSub(getAvailableItemCount("literalIndexes"), getProcessedItemCount("literalIndexes")));
    34     Value * inputBufferBasePtr = getRawInputPointer("inputStream", iBuilder->getSize(0), iBuilder->getSize(0));
    35     Value * outputBufferBasePtr = getRawOutputPointer("outputStream", iBuilder->getSize(0), iBuilder->getSize(0));
     33            iBuilder->CreateSub(iBuilder->getAvailableItemCount("literalIndexes"), iBuilder->getProcessedItemCount("literalIndexes")));
     34    Value * inputBufferBasePtr = iBuilder->getRawInputPointer("inputStream", iBuilder->getSize(0), iBuilder->getSize(0));
     35    Value * outputBufferBasePtr = iBuilder->getRawOutputPointer("outputStream", iBuilder->getSize(0), iBuilder->getSize(0));
    3636    iBuilder->CreateBr(loopBody);
    3737
     
    4343    // Indexes extraction.
    4444    Value * literalStartPtr = getInputPtr(iBuilder,
    45             getInputStreamBlockPtr("literalIndexes", iBuilder->getSize(0)), phiInputIndex);
     45            iBuilder->getInputStreamBlockPtr("literalIndexes", iBuilder->getSize(0)), phiInputIndex);
    4646    Value * literalLengthPtr = getInputPtr(iBuilder,
    47             getInputStreamBlockPtr("literalIndexes", iBuilder->getSize(1)), phiInputIndex);
     47            iBuilder->getInputStreamBlockPtr("literalIndexes", iBuilder->getSize(1)), phiInputIndex);
    4848    Value * matchOffsetPtr = getInputPtr(iBuilder,
    49             getInputStreamBlockPtr("matchIndexes", iBuilder->getSize(0)), phiInputIndex);
     49            iBuilder->getInputStreamBlockPtr("matchIndexes", iBuilder->getSize(0)), phiInputIndex);
    5050    Value * matchLengthPtr = getInputPtr(iBuilder,
    51             getInputStreamBlockPtr("matchIndexes", iBuilder->getSize(1)), phiInputIndex);
     51            iBuilder->getInputStreamBlockPtr("matchIndexes", iBuilder->getSize(1)), phiInputIndex);
    5252    Value * literalStart = iBuilder->CreateZExt(iBuilder->CreateLoad(literalStartPtr), iBuilder->getSizeTy());
    5353    Value * literalLength = iBuilder->CreateZExt(iBuilder->CreateLoad(literalLengthPtr), iBuilder->getSizeTy());
     
    5656
    5757#if 0
    58     Value * processedItem = iBuilder->CreateAdd(getProcessedItemCount("literalIndexes"), phiInputIndex);
     58    Value * processedItem = iBuilder->CreateAdd(iBuilder->getProcessedItemCount("literalIndexes"), phiInputIndex);
    5959    iBuilder->CallPrintInt("ProccessedItem", processedItem);
    6060    iBuilder->CallPrintInt("LiteralStart", literalStart);
     
    6666    // =================================================
    6767    // Literals.
    68     Value * outputItems = getProducedItemCount("outputStream");
     68    Value * outputItems = iBuilder->getProducedItemCount("outputStream");
    6969    Value * bufferOffset = iBuilder->CreateAnd(outputItems, bufferSizeMask);
    7070    Value * remainingBuffer = iBuilder->CreateSub(bufferSize, bufferOffset);
     
    9494            iBuilder->getSize(4)
    9595            );
    96     BasicBlock * cpyLoopCond = CreateBasicBlock("matchcopy_loop_cond");
    97     BasicBlock * cpyLoopBody = CreateBasicBlock("matchcopy_loop_body");
    98     BasicBlock * cpyLoopExit = CreateBasicBlock("matchcopy_loop_exit");
     96    BasicBlock * cpyLoopCond = iBuilder->CreateBasicBlock("matchcopy_loop_cond");
     97    BasicBlock * cpyLoopBody = iBuilder->CreateBasicBlock("matchcopy_loop_body");
     98    BasicBlock * cpyLoopExit = iBuilder->CreateBasicBlock("matchcopy_loop_exit");
    9999    iBuilder->CreateBr(cpyLoopCond);
    100100
     
    117117    iBuilder->CallPrintIntToStderr("dstOffset", phiDstOffset);
    118118#endif
    119     BasicBlock * reachingBufferEnd_then = CreateBasicBlock("matchcopy_reaching_buf_end_then");
    120     BasicBlock * reachingBufferEnd_else = CreateBasicBlock("matchcopy_reaching_buf_end_else");
     119    BasicBlock * reachingBufferEnd_then = iBuilder->CreateBasicBlock("matchcopy_reaching_buf_end_then");
     120    BasicBlock * reachingBufferEnd_else = iBuilder->CreateBasicBlock("matchcopy_reaching_buf_end_else");
    121121    Value * distSrcEnd = iBuilder->CreateSub(bufferSize, phiSrcOffset);
    122122    Value * distDstEnd = iBuilder->CreateSub(bufferSize, phiDstOffset);
     
    170170    iBuilder->SetInsertPoint(cpyLoopExit);
    171171    outputItems = iBuilder->CreateAdd(outputItems, matchLength);
    172     setProducedItemCount("outputStream", outputItems);
     172    iBuilder->setProducedItemCount("outputStream", outputItems);
    173173
    174174    Value * newInputIndex = iBuilder->CreateAdd(phiInputIndex, iBuilder->getSize(1));
     
    182182    iBuilder->SetInsertPoint(loopExit);
    183183#ifndef NDEBUG
    184     iBuilder->CallPrintInt("Decompressed bytes", getProducedItemCount("outputStream"));
     184    iBuilder->CallPrintInt("Decompressed bytes", iBuilder->getProducedItemCount("outputStream"));
    185185#endif
    186186}
  • icGREP/icgrep-devel/icgrep/kernels/lz4_bytestream_decoder.h

    r5436 r5440  
    1818    LZ4ByteStreamDecoderKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, size_t bufferSize);
    1919protected:
    20     void generateDoBlockMethod() override;
     20    void generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
    2121private:
    2222    size_t mBufferSize;
  • icGREP/icgrep-devel/icgrep/kernels/lz4_index_decoder.cpp

    r5436 r5440  
    2828namespace {
    2929
    30 Value * generateBitswap(IDISA::IDISA_Builder * const iBuilder, Value * v) {
     30Value * generateBitswap(const std::unique_ptr<KernelBuilder> & iBuilder, Value * v) {
    3131    Value * bswapFunc = Intrinsic::getDeclaration(iBuilder->getModule(),
    3232            Intrinsic::bswap, v->getType());
     
    3434}
    3535
    36 Value * selectMin(IDISA::IDISA_Builder * const iBuilder, Value * a, Value * b) {
     36Value * selectMin(const std::unique_ptr<KernelBuilder> & iBuilder, Value * a, Value * b) {
    3737    return iBuilder->CreateSelect(iBuilder->CreateICmpULT(a, b), a, b);
    3838}
    3939
    40 Value * createStackVar(IDISA::IDISA_Builder * const iBuilder, Type * type, StringRef name, Value * initializer = nullptr) {
     40Value * createStackVar(const std::unique_ptr<KernelBuilder> & iBuilder, Type * type, StringRef name, Value * initializer = nullptr) {
    4141    Value * var = iBuilder->CreateAlloca(type, nullptr, name);
    4242    if (initializer) {
     
    4848}
    4949
    50 void incStackVar(IDISA::IDISA_Builder * const iBuilder, Value * svar, Value * increment = nullptr) {
     50void incStackVar(const std::unique_ptr<KernelBuilder> & iBuilder, Value * svar, Value * increment = nullptr) {
    5151    Value * value = iBuilder->CreateLoad(svar);
    5252    if (increment) {
     
    5858}
    5959
    60 Value * getOutputPtr(IDISA::IDISA_Builder * const iBuilder, Value * blockStartPtr, Value * offset) {
     60Value * getOutputPtr(const std::unique_ptr<KernelBuilder> & iBuilder, Value * blockStartPtr, Value * offset) {
    6161    return iBuilder->CreateGEP(
    6262            iBuilder->CreatePointerCast(blockStartPtr, iBuilder->getInt32Ty()->getPointerTo()),
     
    6767}       // anonymouse namespace
    6868
    69 
    70 /**
    71  * In order to allow mem2reg to promote the stack variables, alloca's have
    72  * to be in the entry block of a function. Thus, we need to disable indirect
    73  * branching on this kernel to have a standalone DoMethod function.
    74  */
    75 bool LZ4IndexDecoderKernel::useIndirectBr() const {
    76     return false;
    77 }
    78 
    79 
    8069/**
    8170 * Get the offset within the current word.
    8271 */
    83 Value * LZ4IndexDecoderKernel::getWordOffset() {
     72Value * LZ4IndexDecoderKernel::getWordOffset(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
    8473    Value * wordWidthMask = iBuilder->getInt32(wordWidth - 1);
    8574    return iBuilder->CreateAnd(
     
    9382 * Get the offset of the start of the current word.
    9483 */
    95 Value * LZ4IndexDecoderKernel::getWordStartOffset() {
     84Value * LZ4IndexDecoderKernel::getWordStartOffset(const std::unique_ptr<KernelBuilder> & iBuilder) {
    9685    Value * wordWidthMask = iBuilder->getInt32(wordWidth - 1);
    9786    return iBuilder->CreateAnd(
     
    10695 * If offset is not provided, load the current byte by default.
    10796 */
    108 Value * LZ4IndexDecoderKernel::loadRawByte(Value * offset = nullptr) {
     97Value * LZ4IndexDecoderKernel::loadRawByte(const std::unique_ptr<KernelBuilder> & iBuilder, Value * offset) {
    10998    Value * blockStartPtr = iBuilder->CreatePointerCast(
    110             getInputStreamBlockPtr("byteStream", iBuilder->getInt32(0)),
     99            iBuilder->getInputStreamBlockPtr("byteStream", iBuilder->getInt32(0)),
    111100            iBuilder->getInt8PtrTy()
    112101            );
     
    125114 * cleared  = ....111
    126115 */
    127 void LZ4IndexDecoderKernel::setExtenderUntilOffset() {
     116void LZ4IndexDecoderKernel::setExtenderUntilOffset(const std::unique_ptr<KernelBuilder> & iBuilder) {
    128117    // Little-endian, offset counts from LSB
    129118    // extender = extender ^ ~((1 << offset) -1)
    130119    Value * extender = iBuilder->CreateLoad(sExtender);
    131120    Value * wordOffset = iBuilder->CreateZExt(
    132             getWordOffset(),
     121            getWordOffset(iBuilder),
    133122            iBuilder->getSizeTy()
    134123            );
     
    146135 * Called when we potentially reach a new word.  Usually followed by setExtenderUntilOffset.
    147136 */
    148 void LZ4IndexDecoderKernel::loadCurrentExtender() {
     137void LZ4IndexDecoderKernel::loadCurrentExtender(const std::unique_ptr<KernelBuilder> & iBuilder) {
    149138    iBuilder->CreateStore(
    150139            iBuilder->CreateExtractElement(extenders,
     
    158147
    159148
    160 void LZ4IndexDecoderKernel::generateProduceOutput() {
    161     Value * producedItem = getProducedItemCount("literalIndexes");
     149void LZ4IndexDecoderKernel::generateProduceOutput(const std::unique_ptr<KernelBuilder> &iBuilder) {
     150    Value * producedItem = iBuilder->getProducedItemCount("literalIndexes");
    162151
    163152#ifndef NDEBUG
     
    165154    // LiteralStart is adjusted to be relative to the block start, so that
    166155    // the output can be compared against that of the reference implementation.
    167     //iBuilder->CallPrintInt("LiteralStart", getScalarField("LiteralStart"));
    168     iBuilder->CallPrintInt("LiteralStart", iBuilder->CreateSub(
    169                 getScalarField("LiteralStart"), getScalarField("LZ4BlockStart")));
    170     iBuilder->CallPrintInt("LiteralLength", getScalarField("LiteralLength"));
    171     iBuilder->CallPrintInt("MatchOffset", getScalarField("MatchOffset"));
    172     iBuilder->CallPrintInt("MatchLength", getScalarField("MatchLength"));
     156    Value * literalStart = iBuilder->CreateSub(iBuilder->getScalarField("LiteralStart"), iBuilder->getScalarField("LZ4BlockStart"));
     157    iBuilder->CallPrintInt("LiteralStart", literalStart);
     158    iBuilder->CallPrintInt("LiteralLength", iBuilder->getScalarField("LiteralLength"));
     159    iBuilder->CallPrintInt("MatchOffset", iBuilder->getScalarField("MatchOffset"));
     160    iBuilder->CallPrintInt("MatchLength", iBuilder->getScalarField("MatchLength"));
    173161#endif
    174162    printRTDebugMsg("--------------");
     
    179167            );  // producedItem % blockWidth (as blockWidth is always a power of 2)
    180168    Value * literalStartPtr = getOutputPtr(iBuilder,
    181             getOutputStreamBlockPtr("literalIndexes", iBuilder->getInt32(0)), outputOffset);
     169            iBuilder->getOutputStreamBlockPtr("literalIndexes", iBuilder->getInt32(0)), outputOffset);
    182170    Value * literalLengthPtr = getOutputPtr(iBuilder,
    183             getOutputStreamBlockPtr("literalIndexes", iBuilder->getInt32(1)), outputOffset);
     171            iBuilder->getOutputStreamBlockPtr("literalIndexes", iBuilder->getInt32(1)), outputOffset);
    184172    Value * matchOffsetPtr = getOutputPtr(iBuilder,
    185             getOutputStreamBlockPtr("matchIndexes", iBuilder->getInt32(0)), outputOffset);
     173            iBuilder->getOutputStreamBlockPtr("matchIndexes", iBuilder->getInt32(0)), outputOffset);
    186174    Value * matchLengthPtr = getOutputPtr(iBuilder,
    187             getOutputStreamBlockPtr("matchIndexes", iBuilder->getInt32(1)), outputOffset);
    188     iBuilder->CreateStore(getScalarField("LiteralStart"), literalStartPtr);
    189     iBuilder->CreateStore(getScalarField("LiteralLength"), literalLengthPtr);
    190     iBuilder->CreateStore(getScalarField("MatchOffset"), matchOffsetPtr);
    191     iBuilder->CreateStore(getScalarField("MatchLength"), matchLengthPtr);
    192     setProducedItemCount("literalIndexes", iBuilder->CreateAdd(producedItem, iBuilder->getSize(1)));
     175            iBuilder->getOutputStreamBlockPtr("matchIndexes", iBuilder->getInt32(1)), outputOffset);
     176    iBuilder->CreateStore(iBuilder->getScalarField("LiteralStart"), literalStartPtr);
     177    iBuilder->CreateStore(iBuilder->getScalarField("LiteralLength"), literalLengthPtr);
     178    iBuilder->CreateStore(iBuilder->getScalarField("MatchOffset"), matchOffsetPtr);
     179    iBuilder->CreateStore(iBuilder->getScalarField("MatchLength"), matchLengthPtr);
     180    iBuilder->setProducedItemCount("literalIndexes", iBuilder->CreateAdd(producedItem, iBuilder->getSize(1)));
    193181    // matchIndexes has a fixed ratio of 1:1 w.r.t. literalIndexes.
    194182}
    195183
    196184
    197 void LZ4IndexDecoderKernel::generateDoBlockMethod() {
     185void LZ4IndexDecoderKernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
    198186    BasicBlock * entry_block = iBuilder->GetInsertBlock();
    199     BasicBlock * exit_block = CreateBasicBlock("exit");
     187    BasicBlock * exit_block = iBuilder->CreateBasicBlock("exit");
    200188
    201189    // %entry
     
    203191    printRTDebugMsg("entry");
    204192    // Global positions in the byte stream.
    205     Value * blockNo = getScalarField("BlockNo");
     193    Value * blockNo = iBuilder->getScalarField("BlockNo");
    206194    blockStartPos = iBuilder->CreateMul(blockNo, iBuilder->getInt32(iBuilder->getBitBlockWidth()), "blockStartPos");
    207195    extenders = iBuilder->CreateBitCast(
    208             loadInputStreamBlock("extenders", iBuilder->getInt32(0)),
     196            iBuilder->loadInputStreamBlock("extenders", iBuilder->getInt32(0)),
    209197            VectorType::get(iBuilder->getSizeTy(), iBuilder->getBitBlockWidth() / wordWidth),
    210198            "extenders");
     
    212200    sOffset = createStackVar(iBuilder, iBuilder->getInt32Ty(), "offset");
    213201    // tempLength has different meanings in different states.
    214     sTempLength = createStackVar(iBuilder, iBuilder->getInt32Ty(), "tempLength", getScalarField("TempLength"));
    215     sTempCount = createStackVar(iBuilder, iBuilder->getInt32Ty(), "tempCount", getScalarField("TempCount"));
    216     sState = createStackVar(iBuilder, iBuilder->getInt8Ty(), "state", getScalarField("State"));
     202    sTempLength = createStackVar(iBuilder, iBuilder->getInt32Ty(), "tempLength", iBuilder->getScalarField("TempLength"));
     203    sTempCount = createStackVar(iBuilder, iBuilder->getInt32Ty(), "tempCount", iBuilder->getScalarField("TempCount"));
     204    sState = createStackVar(iBuilder, iBuilder->getInt8Ty(), "state", iBuilder->getScalarField("State"));
    217205    sExtender = createStackVar(iBuilder, iBuilder->getSizeTy(), "extender",
    218206            iBuilder->CreateExtractElement(extenders, iBuilder->getInt32(0)));
    219207
    220     BasicBlock * skippingBytes = CreateBasicBlock("skipping_bytes");
    221     BasicBlock * dispatch = CreateBasicBlock("dispatch");
     208    BasicBlock * skippingBytes = iBuilder->CreateBasicBlock("skipping_bytes");
     209    BasicBlock * dispatch = iBuilder->CreateBasicBlock("dispatch");
    222210
    223211    iBuilder->CreateCondBr(
    224             iBuilder->CreateICmpUGT(getScalarField("BytesToSkip"), iBuilder->getInt32(0)),
     212            iBuilder->CreateICmpUGT(iBuilder->getScalarField("BytesToSkip"), iBuilder->getInt32(0)),
    225213            skippingBytes, dispatch
    226214            );
    227215
    228216    // %skipping_bytes
    229     generateSkippingBytes(skippingBytes, exit_block);
     217    generateSkippingBytes(iBuilder, skippingBytes, exit_block);
    230218    // Insert point is at the end of skippingBytes.
    231219    iBuilder->CreateBr(dispatch);
     
    235223
    236224    // %at_block_checksum
    237     BasicBlock * atBlockChecksum = CreateBasicBlock("at_block_checksum");
    238     generateAtBlockChecksum(atBlockChecksum, skippingBytes);
     225    BasicBlock * atBlockChecksum = iBuilder->CreateBasicBlock("at_block_checksum");
     226    generateAtBlockChecksum(iBuilder, atBlockChecksum, skippingBytes);
    239227 
    240228    // %at_block_size
    241     BasicBlock * atBlockSize = CreateBasicBlock("at_block_size");
    242     generateAtBlockSize(atBlockSize, skippingBytes, exit_block);
     229    BasicBlock * atBlockSize = iBuilder->CreateBasicBlock("at_block_size");
     230    generateAtBlockSize(iBuilder, atBlockSize, skippingBytes, exit_block);
    243231
    244232    // %at_token
    245     BasicBlock * atToken = CreateBasicBlock("at_token");
    246     generateAtToken(atToken, exit_block);
     233    BasicBlock * atToken = iBuilder->CreateBasicBlock("at_token");
     234    generateAtToken(iBuilder, atToken, exit_block);
    247235
    248236    // %extending_literal_length
    249     BasicBlock * extendingLiteralLen = CreateBasicBlock("extending_literal_length");
    250     generateExtendingLiteralLen(extendingLiteralLen, exit_block);
     237    BasicBlock * extendingLiteralLen = iBuilder->CreateBasicBlock("extending_literal_length");
     238    generateExtendingLiteralLen(iBuilder, extendingLiteralLen, exit_block);
    251239
    252240    // %at_literals
    253     BasicBlock * atLiterals = CreateBasicBlock("at_literals");
    254     generateAtLiterals(atLiterals);
     241    BasicBlock * atLiterals = iBuilder->CreateBasicBlock("at_literals");
     242    generateAtLiterals(iBuilder, atLiterals);
    255243    iBuilder->CreateBr(skippingBytes);
    256244
     
    259247    // If the whole LZ4 block is done, process the (optional) checksum.
    260248    // Otherwise, go around to process the next sequence.
    261     BasicBlock * atOffset1 = CreateBasicBlock("at_first_offset");
     249    BasicBlock * atOffset1 = iBuilder->CreateBasicBlock("at_first_offset");
    262250    iBuilder->SetInsertPoint(atOffset1);
    263251    Value * nowGlobalPos = iBuilder->CreateAdd(blockStartPos, iBuilder->CreateLoad(sOffset));
    264     BasicBlock * blockEnd_else = CreateBasicBlock("block_end_else");
     252    BasicBlock * blockEnd_else = iBuilder->CreateBasicBlock("block_end_else");
    265253    // Conditional branch inserted at the end of the last block.
    266254    iBuilder->CreateUnlikelyCondBr(
    267             iBuilder->CreateICmpEQ(nowGlobalPos, getScalarField("LZ4BlockEnd")),
     255            iBuilder->CreateICmpEQ(nowGlobalPos, iBuilder->getScalarField("LZ4BlockEnd")),
    268256            atBlockChecksum, blockEnd_else
    269257            );
    270     generateAtFirstOffset(blockEnd_else, exit_block);
     258    generateAtFirstOffset(iBuilder, blockEnd_else, exit_block);
    271259
    272260    // %at_second_offset
    273     BasicBlock * atOffset2 = CreateBasicBlock("at_second_offset");
    274     generateAtSecondOffset(atOffset2, exit_block);
     261    BasicBlock * atOffset2 = iBuilder->CreateBasicBlock("at_second_offset");
     262    generateAtSecondOffset(iBuilder, atOffset2, exit_block);
    275263
    276264    // %extending_match_length
    277     BasicBlock * extendingMatchLen = CreateBasicBlock("extending_match_length");
    278     generateExtendingMatchLen(extendingMatchLen, exit_block);
     265    BasicBlock * extendingMatchLen = iBuilder->CreateBasicBlock("extending_match_length");
     266    generateExtendingMatchLen(iBuilder, extendingMatchLen, exit_block);
    279267    iBuilder->CreateBr(atToken);
    280268
     
    301289    iBuilder->SetInsertPoint(exit_block);
    302290    printRTDebugMsg("exit");
    303     setScalarField("State", iBuilder->CreateLoad(sState));
    304     setScalarField("TempLength", iBuilder->CreateLoad(sTempLength));
    305     setScalarField("TempCount", iBuilder->CreateLoad(sTempCount));
    306     setScalarField("BlockNo", iBuilder->CreateAdd(blockNo, iBuilder->getInt32(1)));
     291    iBuilder->setScalarField("State", iBuilder->CreateLoad(sState));
     292    iBuilder->setScalarField("TempLength", iBuilder->CreateLoad(sTempLength));
     293    iBuilder->setScalarField("TempCount", iBuilder->CreateLoad(sTempCount));
     294    iBuilder->setScalarField("BlockNo", iBuilder->CreateAdd(blockNo, iBuilder->getInt32(1)));
    307295    // When the kernel builder uses indirectbr, doBlock is not a separate function.
    308296    // Hence, we branch to a new basic block and fall through instead of returning.
    309     BasicBlock * end_block = CreateBasicBlock("end_of_block");
     297    BasicBlock * end_block = iBuilder->CreateBasicBlock("end_of_block");
    310298    iBuilder->CreateBr(end_block);
    311299    iBuilder->SetInsertPoint(end_block);
     
    313301
    314302
    315 void LZ4IndexDecoderKernel::generateBoundaryDetection(State state, BasicBlock * exit_block, bool updateExtenderWord=false) {
     303void LZ4IndexDecoderKernel::generateBoundaryDetection(const std::unique_ptr<KernelBuilder> & iBuilder, State state, BasicBlock * exit_block, bool updateExtenderWord) {
    316304    if (updateExtenderWord) {
    317         BasicBlock * wordBoundary_then = CreateBasicBlock("word_boundary_then-" + StateLabels.at(state));
    318         BasicBlock * blockBoundary_else = CreateBasicBlock("block_boundary_else-" + StateLabels.at(state));
    319         BasicBlock * wordBoundary_cont = CreateBasicBlock("word_boundary_cont-" + StateLabels.at(state));
     305        BasicBlock * wordBoundary_then = iBuilder->CreateBasicBlock("word_boundary_then-" + StateLabels.at(state));
     306        BasicBlock * blockBoundary_else = iBuilder->CreateBasicBlock("block_boundary_else-" + StateLabels.at(state));
     307        BasicBlock * wordBoundary_cont = iBuilder->CreateBasicBlock("word_boundary_cont-" + StateLabels.at(state));
    320308        iBuilder->CreateUnlikelyCondBr(
    321                 iBuilder->CreateICmpEQ(getWordOffset(), iBuilder->getInt32(0)),
     309                iBuilder->CreateICmpEQ(getWordOffset(iBuilder), iBuilder->getInt32(0)),
    322310                wordBoundary_then, wordBoundary_cont
    323311                );
     
    331319        // Reaching word boundary but not block boundary.  Update the extender word as requested.
    332320        iBuilder->SetInsertPoint(blockBoundary_else);
    333         loadCurrentExtender();
     321        loadCurrentExtender(iBuilder);
    334322        iBuilder->CreateBr(wordBoundary_cont);
    335323
     
    337325        iBuilder->SetInsertPoint(wordBoundary_cont);
    338326    } else {
    339         BasicBlock * blockBoundary_cont = CreateBasicBlock("block_boundary_cont-" + StateLabels.at(state));
     327        BasicBlock * blockBoundary_cont = iBuilder->CreateBasicBlock("block_boundary_cont-" + StateLabels.at(state));
    340328        iBuilder->CreateUnlikelyCondBr(
    341329                iBuilder->CreateICmpEQ(iBuilder->CreateLoad(sOffset), iBuilder->getInt32(iBuilder->getBitBlockWidth())),
     
    348336
    349337
    350 void LZ4IndexDecoderKernel::generateSkippingBytes(BasicBlock * bb, BasicBlock * exit_block) {
     338void LZ4IndexDecoderKernel::generateSkippingBytes(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, BasicBlock * bb, BasicBlock * exit_block) {
    351339    iBuilder->SetInsertPoint(bb);
    352340    printRTDebugMsg("skipping bytes");
     
    355343            iBuilder->getInt32(iBuilder->getBitBlockWidth()), iBuilder->CreateLoad(sOffset)
    356344            );
    357     Value * remainingBytesToSkip = getScalarField("BytesToSkip");
     345    Value * remainingBytesToSkip = iBuilder->getScalarField("BytesToSkip");
    358346    Value * advanceDist = selectMin(iBuilder, remainingBytesInBlock, remainingBytesToSkip);
    359347    remainingBytesToSkip = iBuilder->CreateSub(remainingBytesToSkip, advanceDist);
    360348    incStackVar(iBuilder, sOffset, advanceDist);
    361     setScalarField("BytesToSkip", remainingBytesToSkip);
    362 
    363     generateBoundaryDetection(State::SKIPPING_BYTES, exit_block);
     349    iBuilder->setScalarField("BytesToSkip", remainingBytesToSkip);
     350
     351    generateBoundaryDetection(iBuilder, State::SKIPPING_BYTES, exit_block);
    364352    // Falls through.
    365353}
    366354
    367355
    368 void LZ4IndexDecoderKernel::generateAtBlockSize(BasicBlock * bb, BasicBlock * skippingBytes, BasicBlock * exit_block) {
     356void LZ4IndexDecoderKernel::generateAtBlockSize(const std::unique_ptr<KernelBuilder> &iBuilder, BasicBlock * bb, BasicBlock * skippingBytes, BasicBlock * exit_block) {
    369357    iBuilder->CreateBr(bb);
    370358    iBuilder->SetInsertPoint(bb);
     
    378366
    379367    // A do-while loop.
    380     BasicBlock * loopBody = CreateBasicBlock("blocksize_loop_body");
    381     BasicBlock * loopExit = CreateBasicBlock("blocksize_loop_exit");
     368    BasicBlock * loopBody = iBuilder->CreateBasicBlock("blocksize_loop_body");
     369    BasicBlock * loopExit = iBuilder->CreateBasicBlock("blocksize_loop_exit");
    382370    iBuilder->CreateBr(loopBody);
    383371
    384372    iBuilder->SetInsertPoint(loopBody);
    385     Value * byte = loadRawByte();
     373    Value * byte = loadRawByte(iBuilder);
    386374    Value * newTempLength = iBuilder->CreateAdd(
    387375            iBuilder->CreateShl(iBuilder->CreateLoad(sTempLength), iBuilder->getInt32(8)),
     
    401389
    402390    iBuilder->SetInsertPoint(loopExit);
    403     BasicBlock * blockSizeCompleted_then = CreateBasicBlock("blocksize_completed_then");
    404     BasicBlock * blockSizeCompleted_cont = CreateBasicBlock("blocksize_completed_cont");
     391    BasicBlock * blockSizeCompleted_then = iBuilder->CreateBasicBlock("blocksize_completed_then");
     392    BasicBlock * blockSizeCompleted_cont = iBuilder->CreateBasicBlock("blocksize_completed_cont");
    405393    iBuilder->CreateLikelyCondBr(
    406394            iBuilder->CreateICmpEQ(iBuilder->CreateLoad(sTempCount), iBuilder->getInt32(4)),
     
    413401    Value * blockSize = generateBitswap(iBuilder, iBuilder->CreateLoad(sTempLength));
    414402    Value * currentPos = iBuilder->CreateAdd(blockStartPos, iBuilder->CreateLoad(sOffset));
    415     setScalarField("LZ4BlockStart", currentPos);
    416     setScalarField("LZ4BlockEnd", iBuilder->CreateAdd(currentPos, blockSize));
     403    iBuilder->setScalarField("LZ4BlockStart", currentPos);
     404    iBuilder->setScalarField("LZ4BlockEnd", iBuilder->CreateAdd(currentPos, blockSize));
    417405    printRTDebugInt("blockSize", blockSize);
    418406
    419     BasicBlock * uncompressedBlock_then = CreateBasicBlock("uncompressed_block_then");
    420     BasicBlock * uncompressedBlock_else = CreateBasicBlock("uncompressed_block_cont");
     407    BasicBlock * uncompressedBlock_then = iBuilder->CreateBasicBlock("uncompressed_block_then");
     408    BasicBlock * uncompressedBlock_else = iBuilder->CreateBasicBlock("uncompressed_block_cont");
    421409    iBuilder->CreateUnlikelyCondBr(
    422410            iBuilder->CreateTrunc(
     
    430418    iBuilder->SetInsertPoint(uncompressedBlock_then);
    431419    Value * realBlockSize = iBuilder->CreateXor(blockSize, iBuilder->getInt32(1L << 31));
    432     setScalarField("LZ4BlockEnd", iBuilder->CreateAdd(currentPos, realBlockSize));
    433     setScalarField("BytesToSkip", realBlockSize);
    434     setScalarField("LiteralStart", currentPos);
    435     setScalarField("LiteralLength", realBlockSize);
     420    iBuilder->setScalarField("LZ4BlockEnd", iBuilder->CreateAdd(currentPos, realBlockSize));
     421    iBuilder->setScalarField("BytesToSkip", realBlockSize);
     422    iBuilder->setScalarField("LiteralStart", currentPos);
     423    iBuilder->setScalarField("LiteralLength", realBlockSize);
    436424    // No need to set MatchLength/MatchOffset to 0, nor to produce output,
    437425    // because %atBlockChecksum will do so as the last sequence.
     
    453441    // We could be at the boundary no matter the block size is completed or not.
    454442    iBuilder->SetInsertPoint(blockSizeCompleted_cont);
    455     generateBoundaryDetection(State::AT_BLOCK_SIZE, exit_block);
     443    generateBoundaryDetection(iBuilder, State::AT_BLOCK_SIZE, exit_block);
    456444    // Falls through to %at_token.
    457445}
    458446
    459447
    460 void LZ4IndexDecoderKernel::generateAtToken(BasicBlock * bb, BasicBlock * exit_block) {
     448void LZ4IndexDecoderKernel::generateAtToken(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, BasicBlock * bb, BasicBlock * exit_block) {
    461449    iBuilder->CreateBr(bb);
    462450    iBuilder->SetInsertPoint(bb);
    463451    printRTDebugMsg("reading token");
    464452
    465     Value * token = loadRawByte();
     453    Value * token = loadRawByte(iBuilder);
    466454    Value * literalLen = iBuilder->CreateZExt(
    467455        iBuilder->CreateLShr(token, iBuilder->getInt8(4)),
     
    474462    incStackVar(iBuilder, sOffset);
    475463    // Prepare extender word for scanning.
    476     loadCurrentExtender();
    477     setExtenderUntilOffset();
     464    loadCurrentExtender(iBuilder);
     465    setExtenderUntilOffset(iBuilder);
    478466    // Store the (partial) match length to be extended later.
    479     setScalarField("MatchLength", matchLen);
     467    iBuilder->setScalarField("MatchLength", matchLen);
    480468    // Use tempLength to accumulate extended lengths (until at_literals).
    481469    iBuilder->CreateStore(literalLen, sTempLength);
    482470    iBuilder->CreateStore(iBuilder->getInt8(State::EXTENDING_LITERAL_LENGTH), sState);
    483471
    484     generateBoundaryDetection(State::AT_TOKEN, exit_block);
     472    generateBoundaryDetection(iBuilder, State::AT_TOKEN, exit_block);
    485473    // Falls through to %extending_literal_length.
    486474}
    487475
    488476
    489 void LZ4IndexDecoderKernel::generateExtendingLiteralLen(BasicBlock * bb, BasicBlock * exit_block) {
     477void LZ4IndexDecoderKernel::generateExtendingLiteralLen(const std::unique_ptr<KernelBuilder> & iBuilder, BasicBlock * bb, BasicBlock * exit_block) {
    490478    iBuilder->CreateBr(bb);
    491479    iBuilder->SetInsertPoint(bb);
    492480    printRTDebugMsg("extending literal len");
    493481
    494     Value * wordOffset = getWordOffset();
    495     Value * blockOffset = getWordStartOffset();
     482    Value * wordOffset = getWordOffset(iBuilder);
     483    Value * blockOffset = getWordStartOffset(iBuilder);
    496484    Value * literalLen = iBuilder->CreateLoad(sTempLength);
    497485    Value * literalExtEnd = iBuilder->CreateTrunc(
     
    512500    Value * lastByte = iBuilder->CreateSelect(literalExtReachBoundary,
    513501            iBuilder->getInt8(0),
    514             loadRawByte(iBuilder->CreateAdd(blockOffset, loadOffset)));
     502            loadRawByte(iBuilder, iBuilder->CreateAdd(blockOffset, loadOffset)));
    515503    Value * literalLenExted = iBuilder->CreateICmpUGE(literalLen, iBuilder->getInt32(0xf));
    516504    literalLen = iBuilder->CreateSelect(literalLenExted,
     
    540528    iBuilder->CreateStore(newState, sState);
    541529
    542     generateBoundaryDetection(State::EXTENDING_LITERAL_LENGTH, exit_block, true);
    543     BasicBlock * cont_block = CreateBasicBlock("finished_" + StateLabels.at(State::EXTENDING_LITERAL_LENGTH));
     530    generateBoundaryDetection(iBuilder, State::EXTENDING_LITERAL_LENGTH, exit_block, true);
     531    BasicBlock * cont_block = iBuilder->CreateBasicBlock("finished_" + StateLabels.at(State::EXTENDING_LITERAL_LENGTH));
    544532    // Insert point is still in wordBoundary block now.
    545533    // See if there are still more extenders.
     
    551539
    552540
    553 void LZ4IndexDecoderKernel::generateAtLiterals(BasicBlock * bb) {
     541void LZ4IndexDecoderKernel::generateAtLiterals(const std::unique_ptr<KernelBuilder> & iBuilder, BasicBlock * bb) {
    554542    iBuilder->CreateBr(bb);
    555543    iBuilder->SetInsertPoint(bb);
    556544
    557     setScalarField("LiteralStart", iBuilder->CreateAdd(blockStartPos, iBuilder->CreateLoad(sOffset)));
    558     setScalarField("LiteralLength", iBuilder->CreateLoad(sTempLength));
    559     setScalarField("BytesToSkip", iBuilder->CreateLoad(sTempLength));
     545    iBuilder->setScalarField("LiteralStart", iBuilder->CreateAdd(blockStartPos, iBuilder->CreateLoad(sOffset)));
     546    iBuilder->setScalarField("LiteralLength", iBuilder->CreateLoad(sTempLength));
     547    iBuilder->setScalarField("BytesToSkip", iBuilder->CreateLoad(sTempLength));
    560548    iBuilder->CreateStore(iBuilder->getInt8(State::AT_FIRST_OFFSET), sState);
    561549
     
    565553
    566554
    567 void LZ4IndexDecoderKernel::generateAtFirstOffset(BasicBlock * bb, BasicBlock * exit_block) {
     555void LZ4IndexDecoderKernel::generateAtFirstOffset(const std::unique_ptr<KernelBuilder> &iBuilder, BasicBlock * bb, BasicBlock * exit_block) {
    568556    iBuilder->SetInsertPoint(bb);
    569557    printRTDebugMsg("reading first offset");
    570558
    571     Value * byte = iBuilder->CreateZExt(loadRawByte(), iBuilder->getInt32Ty());
     559    Value * byte = iBuilder->CreateZExt(loadRawByte(iBuilder), iBuilder->getInt32Ty());
    572560    // Use tempLength to store partial offset.
    573561    iBuilder->CreateStore(byte, sTempLength);
     
    575563    iBuilder->CreateStore(iBuilder->getInt8(State::AT_SECOND_OFFSET), sState);
    576564
    577     generateBoundaryDetection(State::AT_FIRST_OFFSET, exit_block);
     565    generateBoundaryDetection(iBuilder, State::AT_FIRST_OFFSET, exit_block);
    578566    // Falls through to %at_second_offset.
    579567}
    580568
    581569
    582 void LZ4IndexDecoderKernel::generateAtSecondOffset(BasicBlock * bb, BasicBlock * exit_block) {
     570void LZ4IndexDecoderKernel::generateAtSecondOffset(const std::unique_ptr<KernelBuilder> & iBuilder, BasicBlock * bb, BasicBlock * exit_block) {
    583571    iBuilder->CreateBr(bb);
    584572    iBuilder->SetInsertPoint(bb);
     
    586574
    587575    Value * byte1 = iBuilder->CreateLoad(sTempLength);
    588     Value * byte2 = iBuilder->CreateZExt(loadRawByte(), iBuilder->getInt32Ty());
     576    Value * byte2 = iBuilder->CreateZExt(loadRawByte(iBuilder), iBuilder->getInt32Ty());
    589577    Value * offset = iBuilder->CreateAdd(
    590578            iBuilder->CreateShl(byte2, iBuilder->getInt32(8)),
    591579            byte1
    592580            );
    593     setScalarField("MatchOffset", offset);
     581    iBuilder->setScalarField("MatchOffset", offset);
    594582    incStackVar(iBuilder, sOffset);
    595583    // Prepare extender word and tempLength for extending.
    596     loadCurrentExtender();
    597     setExtenderUntilOffset();
    598     iBuilder->CreateStore(getScalarField("MatchLength"), sTempLength);
     584    loadCurrentExtender(iBuilder);
     585    setExtenderUntilOffset(iBuilder);
     586    iBuilder->CreateStore(iBuilder->getScalarField("MatchLength"), sTempLength);
    599587    iBuilder->CreateStore(iBuilder->getInt8(State::EXTENDING_MATCH_LENGTH), sState);
    600588
    601     generateBoundaryDetection(State::AT_SECOND_OFFSET, exit_block);
     589    generateBoundaryDetection(iBuilder, State::AT_SECOND_OFFSET, exit_block);
    602590    // Falls through to %extending_match_length.
    603591}
    604592
    605593
    606 void LZ4IndexDecoderKernel::generateExtendingMatchLen(BasicBlock * bb, BasicBlock * exit_block) {
     594void LZ4IndexDecoderKernel::generateExtendingMatchLen(const std::unique_ptr<KernelBuilder> & iBuilder, BasicBlock * bb, BasicBlock * exit_block) {
    607595    iBuilder->CreateBr(bb);
    608596    iBuilder->SetInsertPoint(bb);
    609597    printRTDebugMsg("extending match length");
    610598    printGlobalPos();
    611     printRTDebugInt("rawbyte", loadRawByte());
     599    printRTDebugInt("rawbyte", loadRawByte(iBuilder));
    612600    printRTDebugInt("extword", iBuilder->CreateLoad(sExtender));
    613601
    614     Value * wordOffset = getWordOffset();
    615     Value * blockOffset = getWordStartOffset();
     602    Value * wordOffset = getWordOffset(iBuilder);
     603    Value * blockOffset = getWordStartOffset(iBuilder);
    616604    Value * matchLen = iBuilder->CreateLoad(sTempLength);
    617605    Value * matchExtEnd = iBuilder->CreateTrunc(
     
    633621    Value * lastByte = iBuilder->CreateSelect(matchExtReachBoundary,
    634622            iBuilder->getInt8(0),
    635             loadRawByte(iBuilder->CreateAdd(blockOffset, loadOffset)));
     623            loadRawByte(iBuilder, iBuilder->CreateAdd(blockOffset, loadOffset)));
    636624    Value * matchLenExted = iBuilder->CreateICmpUGE(matchLen, iBuilder->getInt32(0xf));
    637625    matchLen = iBuilder->CreateSelect(matchLenExted,
     
    657645
    658646    Value * unfinished = iBuilder->CreateAnd(matchExtReachBoundary, matchLenExted);
    659     BasicBlock * output_then = CreateBasicBlock("output_then");
    660     BasicBlock * output_cont = CreateBasicBlock("output_cont");
     647    BasicBlock * output_then = iBuilder->CreateBasicBlock("output_then");
     648    BasicBlock * output_cont = iBuilder->CreateBasicBlock("output_cont");
    661649    iBuilder->CreateLikelyCondBr(
    662650            iBuilder->CreateNot(unfinished),
     
    666654    iBuilder->CreateStore(iBuilder->getInt8(State::AT_TOKEN), sState);
    667655    matchLen = iBuilder->CreateAdd(matchLen, iBuilder->getInt32(4));    // Add the constant at the end.
    668     setScalarField("MatchLength", matchLen);
    669     generateProduceOutput();
     656    iBuilder->setScalarField("MatchLength", matchLen);
     657    generateProduceOutput(iBuilder);
    670658    iBuilder->CreateBr(output_cont);
    671659
    672660    iBuilder->SetInsertPoint(output_cont);
    673     generateBoundaryDetection(State::EXTENDING_MATCH_LENGTH, exit_block, true);
    674     BasicBlock * cont_block = CreateBasicBlock("finished_" + StateLabels.at(State::EXTENDING_MATCH_LENGTH));
     661    generateBoundaryDetection(iBuilder, State::EXTENDING_MATCH_LENGTH, exit_block, true);
     662    BasicBlock * cont_block = iBuilder->CreateBasicBlock("finished_" + StateLabels.at(State::EXTENDING_MATCH_LENGTH));
    675663    // Insert point is still in wordBoundary block now.
    676664    // See if there are still more extenders.
     
    681669
    682670
    683 void LZ4IndexDecoderKernel::generateAtBlockChecksum(BasicBlock * bb, BasicBlock * skippingBytes) {
     671void LZ4IndexDecoderKernel::generateAtBlockChecksum(const std::unique_ptr<KernelBuilder> & iBuilder, BasicBlock * bb, BasicBlock * skippingBytes) {
    684672    // No branch here as we have made a conditional branch outside.
    685673    iBuilder->SetInsertPoint(bb);
     
    687675
    688676    // Produce the partial output (fill matchIndexes with 0).
    689     setScalarField("MatchOffset", iBuilder->getInt32(0));
    690     setScalarField("MatchLength", iBuilder->getInt32(0));
    691     generateProduceOutput();
    692 
    693     BasicBlock * hasChecksum_then = CreateBasicBlock("has_checksum_then");
    694     BasicBlock * hasChecksum_cont = CreateBasicBlock("has_checksum_cont");
     677    iBuilder->setScalarField("MatchOffset", iBuilder->getInt32(0));
     678    iBuilder->setScalarField("MatchLength", iBuilder->getInt32(0));
     679    generateProduceOutput(iBuilder);
     680
     681    BasicBlock * hasChecksum_then = iBuilder->CreateBasicBlock("has_checksum_then");
     682    BasicBlock * hasChecksum_cont = iBuilder->CreateBasicBlock("has_checksum_cont");
    695683
    696684    iBuilder->CreateStore(iBuilder->getInt8(State::AT_BLOCK_SIZE), sState);
    697     iBuilder->CreateCondBr(getScalarField("hasBlockChecksum"), hasChecksum_then, hasChecksum_cont);
     685    iBuilder->CreateCondBr(iBuilder->getScalarField("hasBlockChecksum"), hasChecksum_then, hasChecksum_cont);
    698686
    699687    iBuilder->SetInsertPoint(hasChecksum_then);
    700     setScalarField("BytesToSkip", iBuilder->getInt32(4));
     688    iBuilder->setScalarField("BytesToSkip", iBuilder->getInt32(4));
    701689    iBuilder->CreateBr(skippingBytes);
    702690    // Boundary detection will be done in skipping_bytes.
  • icGREP/icgrep-devel/icgrep/kernels/lz4_index_decoder.h

    r5436 r5440  
    2626    LZ4IndexDecoderKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
    2727protected:
    28     void generateDoBlockMethod() override;
     28    void generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
    2929private:
    30     bool useIndirectBr() const override;
    3130
    3231    enum State : unsigned char {
     
    6867
    6968    // Helper methods.
    70     llvm::Value * getWordOffset();
    71     llvm::Value * getWordStartOffset();
    72     llvm::Value * loadRawByte(llvm::Value * offset);
    73     void setExtenderUntilOffset();
    74     void loadCurrentExtender();
     69    llvm::Value * getWordOffset(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
     70    llvm::Value * getWordStartOffset(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
     71    llvm::Value * loadRawByte(const std::unique_ptr<KernelBuilder> & iBuilder, llvm::Value * offset = nullptr);
     72    void setExtenderUntilOffset(const std::unique_ptr<KernelBuilder> & iBuilder);
     73    void loadCurrentExtender(const std::unique_ptr<KernelBuilder> & iBuilder);
    7574
    76     void generateProduceOutput();
    77     void generateBoundaryDetection(State state, llvm::BasicBlock * exit_block, bool updateExtenderWord);
     75    void generateProduceOutput(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
     76    void generateBoundaryDetection(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, State state, llvm::BasicBlock * exit_block, bool updateExtenderWord = false);
    7877    // Generate basic blocks for each state.
    79     void generateSkippingBytes(llvm::BasicBlock * bb, llvm::BasicBlock * exit_block);
    80     void generateAtBlockSize(llvm::BasicBlock * bb, llvm::BasicBlock * skippingBytes, llvm::BasicBlock * exit_block);
    81     void generateAtToken(llvm::BasicBlock * bb, llvm::BasicBlock * exit_block);
    82     void generateExtendingLiteralLen(llvm::BasicBlock * bb, llvm::BasicBlock * exit_block);
    83     void generateAtLiterals(llvm::BasicBlock * bb);
    84     void generateAtFirstOffset(llvm::BasicBlock * bb, llvm::BasicBlock * exit_block);
    85     void generateAtSecondOffset(llvm::BasicBlock * bb, llvm::BasicBlock * exit_block);
    86     void generateExtendingMatchLen(llvm::BasicBlock * bb, llvm::BasicBlock * exit_block);
    87     void generateAtBlockChecksum(llvm::BasicBlock * bb, llvm::BasicBlock * skippingBytes);
     78    void generateSkippingBytes(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, llvm::BasicBlock * bb, llvm::BasicBlock * exit_block);
     79    void generateAtBlockSize(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, llvm::BasicBlock * bb, llvm::BasicBlock * skippingBytes, llvm::BasicBlock * exit_block);
     80    void generateAtToken(const std::unique_ptr<KernelBuilder> & iBuilder, llvm::BasicBlock * bb, llvm::BasicBlock * exit_block);
     81    void generateExtendingLiteralLen(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, llvm::BasicBlock * bb, llvm::BasicBlock * exit_block);
     82    void generateAtLiterals(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, llvm::BasicBlock * bb);
     83    void generateAtFirstOffset(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, llvm::BasicBlock * bb, llvm::BasicBlock * exit_block);
     84    void generateAtSecondOffset(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, llvm::BasicBlock * bb, llvm::BasicBlock * exit_block);
     85    void generateExtendingMatchLen(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, llvm::BasicBlock * bb, llvm::BasicBlock * exit_block);
     86    void generateAtBlockChecksum(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, llvm::BasicBlock * bb, llvm::BasicBlock * skippingBytes);
    8887};
    8988
  • icGREP/icgrep-devel/icgrep/kernels/match_count.cpp

    r5436 r5440  
    1616    }
    1717
    18 void MatchCount::generateDoBlockMethod() {
     18void MatchCount::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
    1919
    2020    const unsigned counterSize = iBuilder->getSizeTy()->getBitWidth();
    21     Value * to_count = loadInputStreamBlock("matches", iBuilder->getInt32(0));
    22     Value * count = getScalarField("matchedLineCount");
     21    Value * to_count = iBuilder->loadInputStreamBlock("matches", iBuilder->getInt32(0));
     22    Value * count = iBuilder->getScalarField("matchedLineCount");
    2323   
    2424    Value * value = nullptr;
     
    3535    }
    3636    value = iBuilder->CreateAdd(value, count);
    37     setScalarField("matchedLineCount", value);
     37    iBuilder->setScalarField("matchedLineCount", value);
    3838}
    3939
  • icGREP/icgrep-devel/icgrep/kernels/match_count.h

    r5436 r5440  
    1212
    1313class MatchCount : public BlockOrientedKernel {
    14 public:
    15    
    16     MatchCount(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
    17    
    18 protected:
    19    
    20     void generateDoBlockMethod() override;
    21    
     14public:   
     15    MatchCount(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);   
     16protected:   
     17    void generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
    2218};
    2319
  • icGREP/icgrep-devel/icgrep/kernels/p2s_kernel.cpp

    r5436 r5440  
    1515namespace kernel{
    1616       
    17 void p2s_step(IDISA::IDISA_Builder * const iBuilder, Value * p0, Value * p1, Value * hi_mask, unsigned shift, Value * &s1, Value * &s0) {
     17void p2s_step(const std::unique_ptr<KernelBuilder> & iBuilder, Value * p0, Value * p1, Value * hi_mask, unsigned shift, Value * &s1, Value * &s0) {
    1818    Value * t0 = iBuilder->simd_if(1, hi_mask, p0, iBuilder->simd_srli(16, p1, shift));
    1919    Value * t1 = iBuilder->simd_if(1, hi_mask, iBuilder->simd_slli(16, p0, shift), p1);
     
    2222}
    2323
    24 inline void p2s(IDISA::IDISA_Builder * const iBuilder, Value * p[], Value * s[]) {
     24inline void p2s(const std::unique_ptr<KernelBuilder> & iBuilder, Value * p[], Value * s[]) {
    2525    Value * bit00004444[2];
    2626    Value * bit22226666[2];
     
    4242}
    4343               
    44 void P2SKernel::generateDoBlockMethod() {
     44void P2SKernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
    4545    Value * p_bitblock[8];
    4646    for (unsigned i = 0; i < 8; i++) {
    47         p_bitblock[i] = loadInputStreamBlock("basisBits", iBuilder->getInt32(i));
     47        p_bitblock[i] = iBuilder->loadInputStreamBlock("basisBits", iBuilder->getInt32(i));
    4848    }
    4949    Value * s_bytepack[8];
    5050    p2s(iBuilder, p_bitblock, s_bytepack);
    5151    for (unsigned j = 0; j < 8; ++j) {
    52         storeOutputStreamPack("byteStream", iBuilder->getInt32(0), iBuilder->getInt32(j), s_bytepack[j]);
     52        iBuilder->storeOutputStreamPack("byteStream", iBuilder->getInt32(0), iBuilder->getInt32(j), s_bytepack[j]);
    5353    }
    5454}
    5555
    56 void P2SKernelWithCompressedOutput::generateDoBlockMethod() {
     56void P2SKernelWithCompressedOutput::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
    5757    IntegerType * i32 = iBuilder->getInt32Ty();
    5858    PointerType * bitBlockPtrTy = PointerType::get(iBuilder->getBitBlockType(), 0);
     
    6060    Value * basisBits[8];
    6161    for (unsigned i = 0; i < 8; i++) {
    62         basisBits[i] = loadInputStreamBlock("basisBits", iBuilder->getInt32(i));
     62        basisBits[i] = iBuilder->loadInputStreamBlock("basisBits", iBuilder->getInt32(i));
    6363    }
    6464    Value * bytePack[8];
     
    6666
    6767    unsigned units_per_register = iBuilder->getBitBlockWidth()/8;
    68     Value * delCountBlock_ptr = getInputStreamBlockPtr("deletionCounts", iBuilder->getInt32(0));
     68    Value * delCountBlock_ptr = iBuilder->getInputStreamBlockPtr("deletionCounts", iBuilder->getInt32(0));
    6969    Value * unit_counts = iBuilder->fwCast(units_per_register, iBuilder->CreateBlockAlignedLoad(delCountBlock_ptr));
    7070
    71     Value * output_ptr = getOutputStreamBlockPtr("byteStream", iBuilder->getInt32(0));
     71    Value * output_ptr = iBuilder->getOutputStreamBlockPtr("byteStream", iBuilder->getInt32(0));
    7272    output_ptr = iBuilder->CreatePointerCast(output_ptr, iBuilder->getInt8PtrTy());
    7373    Value * offset = iBuilder->getInt32(0);
     
    7777    }
    7878
    79     Value * unitsGenerated = getProducedItemCount("byteStream"); // units generated to buffer
     79    Value * unitsGenerated = iBuilder->getProducedItemCount("byteStream"); // units generated to buffer
    8080    unitsGenerated = iBuilder->CreateAdd(unitsGenerated, iBuilder->CreateZExt(offset, iBuilder->getSizeTy()));
    81     setProducedItemCount("byteStream", unitsGenerated);
     81    iBuilder->setProducedItemCount("byteStream", unitsGenerated);
    8282}
    8383
    84 void P2S16Kernel::generateDoBlockMethod() {
     84void P2S16Kernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
    8585    Value * hi_input[8];
    8686    for (unsigned j = 0; j < 8; ++j) {
    87         hi_input[j] = loadInputStreamBlock("basisBits", iBuilder->getInt32(j));
     87        hi_input[j] = iBuilder->loadInputStreamBlock("basisBits", iBuilder->getInt32(j));
    8888    }
    8989    Value * hi_bytes[8];
     
    9191    Value * lo_input[8];
    9292    for (unsigned j = 0; j < 8; ++j) {
    93         lo_input[j] = loadInputStreamBlock("basisBits", iBuilder->getInt32(j + 8));
     93        lo_input[j] = iBuilder->loadInputStreamBlock("basisBits", iBuilder->getInt32(j + 8));
    9494    }
    9595    Value * lo_bytes[8];
     
    9898        Value * merge0 = iBuilder->bitCast(iBuilder->esimd_mergel(8, hi_bytes[j], lo_bytes[j]));
    9999        Value * merge1 = iBuilder->bitCast(iBuilder->esimd_mergeh(8, hi_bytes[j], lo_bytes[j]));
    100         storeOutputStreamPack("i16Stream", iBuilder->getInt32(0), iBuilder->getInt32(2 * j), merge0);
    101         storeOutputStreamPack("i16Stream", iBuilder->getInt32(0), iBuilder->getInt32(2 * j + 1), merge1);
     100        iBuilder->storeOutputStreamPack("i16Stream", iBuilder->getInt32(0), iBuilder->getInt32(2 * j), merge0);
     101        iBuilder->storeOutputStreamPack("i16Stream", iBuilder->getInt32(0), iBuilder->getInt32(2 * j + 1), merge1);
    102102    }
    103103}
    104104       
    105 void P2S16KernelWithCompressedOutput::generateDoBlockMethod() {
     105void P2S16KernelWithCompressedOutput::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
    106106    IntegerType * i32Ty = iBuilder->getInt32Ty();
    107107    PointerType * int16PtrTy = iBuilder->getInt16Ty()->getPointerTo();
     
    111111    Value * hi_input[8];
    112112    for (unsigned j = 0; j < 8; ++j) {
    113         hi_input[j] = loadInputStreamBlock("basisBits", iBuilder->getInt32(j));
     113        hi_input[j] = iBuilder->loadInputStreamBlock("basisBits", iBuilder->getInt32(j));
    114114    }
    115115    Value * hi_bytes[8];
     
    118118    Value * lo_input[8];
    119119    for (unsigned j = 0; j < 8; ++j) {
    120         lo_input[j] = loadInputStreamBlock("basisBits", iBuilder->getInt32(j + 8));
     120        lo_input[j] = iBuilder->loadInputStreamBlock("basisBits", iBuilder->getInt32(j + 8));
    121121    }
    122122    Value * lo_bytes[8];
    123123    p2s(iBuilder, lo_input, lo_bytes);
    124124
    125     Value * delCountBlock_ptr = getInputStreamBlockPtr("deletionCounts", iBuilder->getInt32(0));
     125    Value * delCountBlock_ptr = iBuilder->getInputStreamBlockPtr("deletionCounts", iBuilder->getInt32(0));
    126126    Value * unit_counts = iBuilder->fwCast(iBuilder->getBitBlockWidth() / 16, iBuilder->CreateBlockAlignedLoad(delCountBlock_ptr));
    127127
    128128
    129     Value * u16_output_ptr = getOutputStreamBlockPtr("i16Stream", iBuilder->getInt32(0));
     129    Value * u16_output_ptr = iBuilder->getOutputStreamBlockPtr("i16Stream", iBuilder->getInt32(0));
    130130    u16_output_ptr = iBuilder->CreatePointerCast(u16_output_ptr, int16PtrTy);
    131     Value * i16UnitsGenerated = getProducedItemCount("i16Stream"); // units generated to buffer
     131    Value * i16UnitsGenerated = iBuilder->getProducedItemCount("i16Stream"); // units generated to buffer
    132132    u16_output_ptr = iBuilder->CreateGEP(u16_output_ptr, iBuilder->CreateURem(i16UnitsGenerated, stride));
    133133
     
    144144    }   
    145145    Value * i16UnitsFinal = iBuilder->CreateAdd(i16UnitsGenerated, iBuilder->CreateZExt(offset, iBuilder->getSizeTy()));
    146     setProducedItemCount("i16Stream", i16UnitsFinal);
     146    iBuilder->setProducedItemCount("i16Stream", i16UnitsFinal);
    147147}
    148148
  • icGREP/icgrep-devel/icgrep/kernels/p2s_kernel.h

    r5436 r5440  
    1818    bool moduleIDisSignature() const override { return true; }
    1919private:
    20     void generateDoBlockMethod() override;
     20    void generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
    2121};
    2222
     
    2727    bool moduleIDisSignature() const override { return true; }
    2828private:
    29     void generateDoBlockMethod() override;
     29    void generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
    3030};
    3131
     
    3636    bool moduleIDisSignature() const override { return true; }
    3737private:
    38     void generateDoBlockMethod() override;
     38    void generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
    3939};
    4040   
     
    4545    bool moduleIDisSignature() const override { return true; }
    4646private:
    47     void generateDoBlockMethod() override;
     47    void generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
    4848};
    4949   
  • icGREP/icgrep-devel/icgrep/kernels/radix64.cpp

    r5436 r5440  
    3939// a continous buffer for the full segment (number of blocks).
    4040
    41 void expand3_4Kernel::generateDoSegmentMethod() {
     41void expand3_4Kernel::generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
    4242
    4343    BasicBlock * expand2_3entry = iBuilder->GetInsertBlock();
    44     BasicBlock * expand_3_4_loop = CreateBasicBlock("expand_3_4_loop");
    45     BasicBlock * expand3_4_loop_exit = CreateBasicBlock("expand3_4_loop_exit");
    46     BasicBlock * finalStep1 = CreateBasicBlock("finalStep1");
    47     BasicBlock * finalStep2 = CreateBasicBlock("finalStep2");
    48     BasicBlock * step2load = CreateBasicBlock("step2load");
    49     BasicBlock * step2store = CreateBasicBlock("step2store");
    50     BasicBlock * finalStep3 = CreateBasicBlock("finalStep3");
    51     BasicBlock * step3load = CreateBasicBlock("step3load");
    52     BasicBlock * step3store = CreateBasicBlock("step3store");
    53     BasicBlock * step3store2 = CreateBasicBlock("step3store2");
    54     BasicBlock * itemsDone = CreateBasicBlock("itemsDone");
    55     BasicBlock * expand3_4_final = CreateBasicBlock("expand3_4_final");
    56     BasicBlock * expand3_4_exit = CreateBasicBlock("expand3_4_exit");
     44    BasicBlock * expand_3_4_loop = iBuilder->CreateBasicBlock("expand_3_4_loop");
     45    BasicBlock * expand3_4_loop_exit = iBuilder->CreateBasicBlock("expand3_4_loop_exit");
     46    BasicBlock * finalStep1 = iBuilder->CreateBasicBlock("finalStep1");
     47    BasicBlock * finalStep2 = iBuilder->CreateBasicBlock("finalStep2");
     48    BasicBlock * step2load = iBuilder->CreateBasicBlock("step2load");
     49    BasicBlock * step2store = iBuilder->CreateBasicBlock("step2store");
     50    BasicBlock * finalStep3 = iBuilder->CreateBasicBlock("finalStep3");
     51    BasicBlock * step3load = iBuilder->CreateBasicBlock("step3load");
     52    BasicBlock * step3store = iBuilder->CreateBasicBlock("step3store");
     53    BasicBlock * step3store2 = iBuilder->CreateBasicBlock("step3store2");
     54    BasicBlock * itemsDone = iBuilder->CreateBasicBlock("itemsDone");
     55    BasicBlock * expand3_4_final = iBuilder->CreateBasicBlock("expand3_4_final");
     56    BasicBlock * expand3_4_exit = iBuilder->CreateBasicBlock("expand3_4_exit");
    5757   
    5858    // Determine the require shufflevector constants.
     
    8484    const unsigned packAlign = iBuilder->getBitBlockWidth()/8;
    8585
    86     Value * processed = getProcessedItemCount("sourceStream");
    87     Value * available = getAvailableItemCount("sourceStream");
     86    Value * processed = iBuilder->getProcessedItemCount("sourceStream");
     87    Value * available = iBuilder->getAvailableItemCount("sourceStream");
    8888    Value * itemsAvail = iBuilder->CreateSub(available, processed);
    8989   
     
    9898
    9999    // A block is made up of 8 packs.  Get the pointer to the first pack (changes the type of the pointer only).
    100     Value * sourcePackPtr = getInputStreamPackPtr("sourceStream", iBuilder->getInt32(0), iBuilder->getInt32(0));
    101     Value * outputPackPtr = getOutputStreamPackPtr("expandedStream", iBuilder->getInt32(0), iBuilder->getInt32(0));
     100    Value * sourcePackPtr = iBuilder->getInputStreamPackPtr("sourceStream", iBuilder->getInt32(0), iBuilder->getInt32(0));
     101    Value * outputPackPtr = iBuilder->getOutputStreamPackPtr("expandedStream", iBuilder->getInt32(0), iBuilder->getInt32(0));
    102102
    103103    Value * hasFullLoop = iBuilder->CreateICmpUGE(loopItemsToDo, triplePackSize);
     
    156156    // Update the processed items count based on the loopItemsToDo value.
    157157    processed = iBuilder->CreateAdd(processed, loopItemsToDo);
    158     setProcessedItemCount("sourceStream", processed);
     158    iBuilder->setProcessedItemCount("sourceStream", processed);
    159159
    160160
     
    226226    iBuilder->SetInsertPoint(itemsDone);
    227227    processed = iBuilder->CreateAdd(processed, excessItems);
    228     setProcessedItemCount("sourceStream", processed);
     228    iBuilder->setProcessedItemCount("sourceStream", processed);
    229229
    230230   
     
    244244//                                   ba    bits to move 12 positions left
    245245//    xwvuts|  nlkjzy|  barqpm|  hgfedc    Target
    246 inline Value * radix64Kernel::processPackData(llvm::Value * bytepack) const {
     246inline Value * radix64Kernel::processPackData(const std::unique_ptr<KernelBuilder> & iBuilder, llvm::Value * bytepack) const {
    247247
    248248    Value * step_right_6 = iBuilder->simd_fill(32, ConstantInt::get(iBuilder->getInt32Ty(), 0x00C00000));
     
    272272}
    273273
    274 void radix64Kernel::generateDoBlockMethod() {
     274void radix64Kernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
    275275    for (unsigned i = 0; i < 8; i++) {
    276         Value * bytepack = loadInputStreamPack("expandedStream", iBuilder->getInt32(0), iBuilder->getInt32(i));
    277         Value * radix64pack = processPackData(bytepack);
    278         storeOutputStreamPack("radix64stream", iBuilder->getInt32(0), iBuilder->getInt32(i), radix64pack);
     276        Value * bytepack = iBuilder->loadInputStreamPack("expandedStream", iBuilder->getInt32(0), iBuilder->getInt32(i));
     277        Value * radix64pack = processPackData(iBuilder, bytepack);
     278        iBuilder->storeOutputStreamPack("radix64stream", iBuilder->getInt32(0), iBuilder->getInt32(i), radix64pack);
    279279    }
    280280}
    281281
    282 void radix64Kernel::generateFinalBlockMethod(Value * remainingBytes) {
     282void radix64Kernel::generateFinalBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder, Value * remainingBytes) {
    283283
    284284    BasicBlock * entry = iBuilder->GetInsertBlock();
    285     BasicBlock * radix64_loop = CreateBasicBlock("radix64_loop");
    286     BasicBlock * fbExit = CreateBasicBlock("fbExit");
     285    BasicBlock * radix64_loop = iBuilder->CreateBasicBlock("radix64_loop");
     286    BasicBlock * fbExit = iBuilder->CreateBasicBlock("fbExit");
    287287   
    288288    const unsigned PACK_SIZE = iBuilder->getStride()/8;
     
    298298    loopRemain->addIncoming(remainingBytes, entry);
    299299
    300     Value * bytepack = loadInputStreamPack("expandedStream", iBuilder->getInt32(0), idx);
    301     Value * radix64pack = processPackData(bytepack);
    302     storeOutputStreamPack("radix64stream", iBuilder->getInt32(0), idx, radix64pack);
     300    Value * bytepack = iBuilder->loadInputStreamPack("expandedStream", iBuilder->getInt32(0), idx);
     301    Value * radix64pack = processPackData(iBuilder, bytepack);
     302    iBuilder->storeOutputStreamPack("radix64stream", iBuilder->getInt32(0), idx, radix64pack);
    303303
    304304    Value* nextIdx = iBuilder->CreateAdd(idx, ConstantInt::get(iBuilder->getInt32Ty(), 1));
     
    314314}
    315315
    316 inline llvm::Value* base64Kernel::processPackData(llvm::Value* bytepack) const {
     316inline llvm::Value* base64Kernel::processPackData(const std::unique_ptr<KernelBuilder> & iBuilder, llvm::Value* bytepack) const {
    317317    Value * mask_gt_25 = iBuilder->simd_ugt(8, bytepack, iBuilder->simd_fill(8, iBuilder->getInt8(25)));
    318318    Value * mask_gt_51 = iBuilder->simd_ugt(8, bytepack, iBuilder->simd_fill(8, iBuilder->getInt8(51)));
     
    332332}
    333333
    334 void base64Kernel::generateDoBlockMethod() {
     334void base64Kernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
    335335    for (unsigned i = 0; i < 8; i++) {
    336         Value * bytepack = loadInputStreamPack("radix64stream", iBuilder->getInt32(0), iBuilder->getInt32(i));
    337         Value * base64pack = processPackData(bytepack);
    338         storeOutputStreamPack("base64stream", iBuilder->getInt32(0), iBuilder->getInt32(i), base64pack);
     336        Value * bytepack = iBuilder->loadInputStreamPack("radix64stream", iBuilder->getInt32(0), iBuilder->getInt32(i));
     337        Value * base64pack = processPackData(iBuilder, bytepack);
     338        iBuilder->storeOutputStreamPack("base64stream", iBuilder->getInt32(0), iBuilder->getInt32(i), base64pack);
    339339    }
    340340}
    341 
    342 //// Special processing for the base 64 format.   The output must always contain a multiple
    343 //// of 4 bytes.   When the number of radix 64 values is not a multiple of 4
    344 //// number of radix 64 values
    345 //void base64Kernel::generateFinalBlockMethod(Value * remainingBytes) {
    346 
    347 //    BasicBlock * entry = iBuilder->GetInsertBlock();
    348 //    BasicBlock * base64_loop = CreateBasicBlock("base64_loop");
    349 //    BasicBlock * loopExit = CreateBasicBlock("loopExit");
    350 //    BasicBlock * doPadding = CreateBasicBlock("doPadding");
    351 //    BasicBlock * doPadding2 = CreateBasicBlock("doPadding2");
    352 //    BasicBlock * fbExit = CreateBasicBlock("fbExit");
    353 
    354 //    Value * remainMod4 = iBuilder->CreateAnd(remainingBytes, iBuilder->getSize(3));
    355 //    Value * padBytes = iBuilder->CreateSub(iBuilder->getSize(4), remainMod4);
    356 //    padBytes = iBuilder->CreateAnd(padBytes, iBuilder->getSize(3));
    357 
    358 //    Constant * packSize = iBuilder->getSize(iBuilder->getStride() / 8);
    359 
    360 //    // Enter the loop only if there is at least one byte remaining to process.
    361 //    iBuilder->CreateCondBr(iBuilder->CreateICmpEQ(remainingBytes, iBuilder->getSize(0)), fbExit, base64_loop);
    362 
    363 //    iBuilder->SetInsertPoint(base64_loop);
    364 //    PHINode * idx = iBuilder->CreatePHI(iBuilder->getInt32Ty(), 2);
    365 //    PHINode * loopRemain = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
    366 //    idx->addIncoming(ConstantInt::getNullValue(iBuilder->getInt32Ty()), entry);
    367 //    loopRemain->addIncoming(remainingBytes, entry);
    368 //    Value * radix64streamPtr = getInputStream("radix64stream", iBuilder->getInt32(0), idx);
    369 //    Value * bytepack = iBuilder->CreateBlockAlignedLoad(radix64streamPtr);
    370 //    Value * base64pack = processPackData(bytepack);
    371 //    Value * base64streamPtr = getOutputStream("base64stream", iBuilder->getInt32(0), idx);
    372 
    373 //    iBuilder->CreateBlockAlignedStore(iBuilder->bitCast(base64pack), base64streamPtr);
    374 //    idx->addIncoming(iBuilder->CreateAdd(idx, ConstantInt::get(iBuilder->getInt32Ty(), 1)), base64_loop);
    375 //    Value* remainAfterLoop = iBuilder->CreateSub(loopRemain, packSize);
    376 //    loopRemain->addIncoming(remainAfterLoop, base64_loop);
    377 
    378 //    Value* continueLoop = iBuilder->CreateICmpSGT(remainAfterLoop, iBuilder->getSize(0));
    379 //    iBuilder->CreateCondBr(continueLoop, base64_loop, loopExit);
    380 
    381 //    iBuilder->SetInsertPoint(loopExit);
    382 //    iBuilder->CreateCondBr(iBuilder->CreateICmpEQ(padBytes, iBuilder->getSize(0)), fbExit, doPadding);
    383 
    384 //    iBuilder->SetInsertPoint(doPadding);
    385 
    386 //    base64streamPtr = getOutputStream("base64stream", iBuilder->getInt32(0), idx);
    387 //    Value * i8streamPtr = iBuilder->CreatePointerCast(base64streamPtr, iBuilder->getInt8PtrTy());
    388 //    iBuilder->CreateStore(ConstantInt::get(iBuilder->getInt8Ty(), '='), iBuilder->CreateGEP(i8streamPtr, remainingBytes));
    389 //    iBuilder->CreateCondBr(iBuilder->CreateICmpEQ(remainMod4, iBuilder->getSize(3)), fbExit, doPadding2);
    390 //    iBuilder->SetInsertPoint(doPadding2);
    391 //    Value * finalPadPos = iBuilder->CreateAdd(remainingBytes, iBuilder->getSize(1));
    392 //    iBuilder->CreateStore(ConstantInt::get(iBuilder->getInt8Ty(), '='), iBuilder->CreateGEP(i8streamPtr, finalPadPos));
    393 //    iBuilder->CreateBr(fbExit);
    394 //    iBuilder->SetInsertPoint(fbExit);
    395 //    Value * produced = iBuilder->CreateAdd(getProducedItemCount("base64stream"), iBuilder->CreateAdd(remainingBytes, padBytes));
    396 //    setProducedItemCount("base64stream", produced);
    397 //}
    398341
    399342// Special processing for the base 64 format.   The output must always contain a multiple
    400343// of 4 bytes.   When the number of radix 64 values is not a multiple of 4
    401344// number of radix 64 values
    402 void base64Kernel::generateFinalBlockMethod(Value * remainingBytes) {
     345void base64Kernel::generateFinalBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder, Value * remainingBytes) {
    403346
    404347    BasicBlock * entry = iBuilder->GetInsertBlock();
    405     BasicBlock * base64_loop = CreateBasicBlock("base64_loop");
    406     BasicBlock * loopExit = CreateBasicBlock("loopExit");
    407     BasicBlock * doPadding = CreateBasicBlock("doPadding");
    408     BasicBlock * doPadding2 = CreateBasicBlock("doPadding2");
    409     BasicBlock * fbExit = CreateBasicBlock("fbExit");
     348    BasicBlock * base64_loop = iBuilder->CreateBasicBlock("base64_loop");
     349    BasicBlock * loopExit = iBuilder->CreateBasicBlock("loopExit");
     350    BasicBlock * doPadding = iBuilder->CreateBasicBlock("doPadding");
     351    BasicBlock * doPadding2 = iBuilder->CreateBasicBlock("doPadding2");
     352    BasicBlock * fbExit = iBuilder->CreateBasicBlock("fbExit");
    410353
    411354    Value * remainMod4 = iBuilder->CreateAnd(remainingBytes, iBuilder->getSize(3));
     
    423366    idx->addIncoming(ConstantInt::getNullValue(iBuilder->getInt32Ty()), entry);
    424367    loopRemain->addIncoming(remainingBytes, entry);
    425     Value * bytepack = loadInputStreamPack("radix64stream", iBuilder->getInt32(0), idx);
    426     Value * base64pack = processPackData(bytepack);
    427     storeOutputStreamPack("base64stream", iBuilder->getInt32(0), idx, base64pack);
     368    Value * bytepack = iBuilder->loadInputStreamPack("radix64stream", iBuilder->getInt32(0), idx);
     369    Value * base64pack = processPackData(iBuilder, bytepack);
     370    iBuilder->storeOutputStreamPack("base64stream", iBuilder->getInt32(0), idx, base64pack);
    428371    idx->addIncoming(iBuilder->CreateAdd(idx, ConstantInt::get(iBuilder->getInt32Ty(), 1)), base64_loop);
    429372    Value* remainAfterLoop = iBuilder->CreateSub(loopRemain, packSize);
     
    437380
    438381    iBuilder->SetInsertPoint(doPadding);
    439     Value * i8output_ptr = getOutputStreamBlockPtr("base64stream", iBuilder->getInt32(0));
     382    Value * i8output_ptr = iBuilder->getOutputStreamBlockPtr("base64stream", iBuilder->getInt32(0));
    440383    i8output_ptr = iBuilder->CreatePointerCast(i8output_ptr, iBuilder->getInt8PtrTy());
    441384    iBuilder->CreateStore(ConstantInt::get(iBuilder->getInt8Ty(), '='), iBuilder->CreateGEP(i8output_ptr, remainingBytes));
  • icGREP/icgrep-devel/icgrep/kernels/radix64.h

    r5436 r5440  
    2525    bool moduleIDisSignature() const override { return true; }
    2626private:
    27     void generateDoSegmentMethod() override;
     27    void generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> &iBuilder) override;
    2828};
    2929
     
    3434    bool moduleIDisSignature() const override { return true; }
    3535private:
    36     virtual void generateDoBlockMethod() override;
    37     virtual void generateFinalBlockMethod(llvm::Value * remainingBytes) override;
    38     llvm::Value* processPackData(llvm::Value* packData) const;
     36    virtual void generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) override;
     37    virtual void generateFinalBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder, llvm::Value * remainingBytes) override;
     38    llvm::Value* processPackData(const std::unique_ptr<KernelBuilder> & iBuilder, llvm::Value* packData) const;
    3939};
    4040
     
    4545    bool moduleIDisSignature() const override { return true; }
    4646private:
    47     virtual void generateDoBlockMethod() override;
    48     virtual void generateFinalBlockMethod(llvm::Value * remainingBytes) override;
    49     llvm::Value* processPackData(llvm::Value* packData) const;
     47    virtual void generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) override;
     48    virtual void generateFinalBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder, llvm::Value * remainingBytes) override;
     49    llvm::Value* processPackData(const std::unique_ptr<KernelBuilder> & iBuilder, llvm::Value* packData) const;
    5050};
    5151
  • icGREP/icgrep-devel/icgrep/kernels/s2p_kernel.cpp

    r5436 r5440  
    1414const int PACK_LANES = 1;
    1515
    16 void s2p_step(IDISA::IDISA_Builder * const iBuilder, Value * s0, Value * s1, Value * hi_mask, unsigned shift, Value * &p0, Value * &p1) {
     16void s2p_step(const std::unique_ptr<KernelBuilder> & iBuilder, Value * s0, Value * s1, Value * hi_mask, unsigned shift, Value * &p0, Value * &p1) {
    1717    Value * t0 = nullptr;
    1818    Value * t1 = nullptr;
     
    3030}
    3131
    32 void s2p(IDISA::IDISA_Builder * const iBuilder, Value * input[], Value * output[]) {
     32void s2p(const std::unique_ptr<KernelBuilder> & iBuilder, Value * input[], Value * output[]) {
    3333    Value * bit00224466[4];
    3434    Value * bit11335577[4];
     
    5757/* Alternative transposition model, but small field width packs are problematic. */
    5858#if 0
    59 void s2p_ideal(IDISA::IDISA_Builder * const iBuilder, Value * input[], Value * output[]) {
     59void s2p_ideal(const std::unique_ptr<KernelBuilder> & iBuilder, Value * input[], Value * output[]) {
    6060    Value * hi_nybble[4];
    6161    Value * lo_nybble[4];
     
    8888   
    8989#if 0
    90 void generateS2P_16Kernel(Module *, IDISA::IDISA_Builder * const iBuilder, KernelBuilder * kBuilder) {
     90void generateS2P_16Kernel(const std::unique_ptr<KernelBuilder> & iBuilder, Kernel * kBuilder) {
    9191    kBuilder->addInputStream(16, "unit_pack");
    9292    for(unsigned i = 0; i < 16; i++) {
    9393            kBuilder->addOutputStream(1);
    9494    }
    95     kBuilder->prepareFunction();
    96 
    9795    Value * ptr = kBuilder->getInputStream(0);
    9896
     
    112110        iBuilder->CreateBlockAlignedStore(output[j], kBuilder->getOutputStream(j));
    113111    }
    114     kBuilder->finalize();
    115112}   
    116113#endif
    117114   
    118 void S2PKernel::generateDoBlockMethod() {
     115void S2PKernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
    119116    Value * bytepack[8];
    120117    for (unsigned i = 0; i < 8; i++) {
    121118        if (mAligned) {
    122             bytepack[i] = loadInputStreamPack("byteStream", iBuilder->getInt32(0), iBuilder->getInt32(i));
     119            bytepack[i] = iBuilder->loadInputStreamPack("byteStream", iBuilder->getInt32(0), iBuilder->getInt32(i));
    123120        } else {
    124             Value * ptr = getInputStreamPackPtr("byteStream", iBuilder->getInt32(0), iBuilder->getInt32(i));
     121            Value * ptr = iBuilder->getInputStreamPackPtr("byteStream", iBuilder->getInt32(0), iBuilder->getInt32(i));
    125122            // CreateLoad defaults to aligned here, so we need to force the alignment to 1 byte.
    126123            bytepack[i] = iBuilder->CreateAlignedLoad(ptr, 1);
     
    130127    s2p(iBuilder, bytepack, basisbits);
    131128    for (unsigned i = 0; i < 8; ++i) {
    132         storeOutputStreamBlock("basisBits", iBuilder->getInt32(i), basisbits[i]);
     129        iBuilder->storeOutputStreamBlock("basisBits", iBuilder->getInt32(i), basisbits[i]);
    133130    }
    134131}
    135132
    136 void S2PKernel::generateFinalBlockMethod(Value * remainingBytes) {
     133void S2PKernel::generateFinalBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder, Value * remainingBytes) {
    137134    /* Prepare the s2p final block function:
    138135     assumption: if remaining bytes is greater than 0, it is safe to read a full block of bytes.
     
    140137     */
    141138   
    142     BasicBlock * finalPartialBlock = CreateBasicBlock("partial");
    143     BasicBlock * finalEmptyBlock = CreateBasicBlock("empty");
    144     BasicBlock * exitBlock = CreateBasicBlock("exit");
     139    BasicBlock * finalPartialBlock = iBuilder->CreateBasicBlock("partial");
     140    BasicBlock * finalEmptyBlock = iBuilder->CreateBasicBlock("empty");
     141    BasicBlock * exitBlock = iBuilder->CreateBasicBlock("exit");
    145142   
    146143    Value * emptyBlockCond = iBuilder->CreateICmpEQ(remainingBytes, iBuilder->getSize(0));
    147144    iBuilder->CreateCondBr(emptyBlockCond, finalEmptyBlock, finalPartialBlock);
    148145    iBuilder->SetInsertPoint(finalPartialBlock);
    149     CreateDoBlockMethodCall();
     146    CreateDoBlockMethodCall(iBuilder);
    150147   
    151148    iBuilder->CreateBr(exitBlock);
     
    154151
    155152    for (unsigned i = 0; i < 8; ++i) {
    156         storeOutputStreamBlock("basisBits", iBuilder->getInt32(i), Constant::getNullValue(iBuilder->getBitBlockType()));
     153        iBuilder->storeOutputStreamBlock("basisBits", iBuilder->getInt32(i), Constant::getNullValue(iBuilder->getBitBlockType()));
    157154    }
    158155
  • icGREP/icgrep-devel/icgrep/kernels/s2p_kernel.h

    r5436 r5440  
    1818    bool moduleIDisSignature() const override { return true; }
    1919protected:
    20     void generateDoBlockMethod() override;
    21     void generateFinalBlockMethod(llvm::Value * remainingBytes) override;
     20    void generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) override;
     21    void generateFinalBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder, llvm::Value * remainingBytes) override;
    2222private:
    2323    bool mAligned;
  • icGREP/icgrep-devel/icgrep/kernels/scanmatchgen.cpp

    r5436 r5440  
    3131}
    3232
    33 void ScanMatchKernel::generateDoBlockMethod() {
     33void ScanMatchKernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> &iBuilder) {
    3434
    3535    Module * const m = iBuilder->getModule();
    3636    BasicBlock * const entryBlock = iBuilder->GetInsertBlock();
    37     BasicBlock * const scanWordIteration = CreateBasicBlock("ScanWordIteration");
    38     BasicBlock * const matches_test_block = CreateBasicBlock("matches_test_block");
    39     BasicBlock * const processMatchesEntry = CreateBasicBlock("process_matches_loop");
    40     BasicBlock * const prior_breaks_block = CreateBasicBlock("prior_breaks_block");
    41     BasicBlock * const loop_final_block = CreateBasicBlock("loop_final_block");
    42     BasicBlock * const processMatchesExit = CreateBasicBlock("matches_done_block");
    43     BasicBlock * const remaining_breaks_block = CreateBasicBlock("remaining_breaks_block");
    44     BasicBlock * const return_block = CreateBasicBlock("return_block");
    45     BasicBlock * const scanWordExit = CreateBasicBlock("ScanWordExit");
     37    BasicBlock * const scanWordIteration = iBuilder->CreateBasicBlock("ScanWordIteration");
     38    BasicBlock * const matches_test_block = iBuilder->CreateBasicBlock("matches_test_block");
     39    BasicBlock * const processMatchesEntry = iBuilder->CreateBasicBlock("process_matches_loop");
     40    BasicBlock * const prior_breaks_block = iBuilder->CreateBasicBlock("prior_breaks_block");
     41    BasicBlock * const loop_final_block = iBuilder->CreateBasicBlock("loop_final_block");
     42    BasicBlock * const processMatchesExit = iBuilder->CreateBasicBlock("matches_done_block");
     43    BasicBlock * const remaining_breaks_block = iBuilder->CreateBasicBlock("remaining_breaks_block");
     44    BasicBlock * const return_block = iBuilder->CreateBasicBlock("return_block");
     45    BasicBlock * const scanWordExit = iBuilder->CreateBasicBlock("ScanWordExit");
    4646    IntegerType * const sizeTy = iBuilder->getSizeTy();
    4747    const unsigned fieldCount = iBuilder->getBitBlockWidth() / sizeTy->getBitWidth();
    4848    VectorType * const scanwordVectorType =  VectorType::get(sizeTy, fieldCount);
    49     Value * const blockNo = getScalarField("BlockNo");
     49    Value * const blockNo = iBuilder->getScalarField("BlockNo");
    5050    Value * const scanwordPos = iBuilder->CreateShl(blockNo, floor_log2(iBuilder->getBitBlockWidth()));
    51     Value * const lastRecordStart = getProcessedItemCount("InputStream");
    52     Value * const lastRecordNum = getScalarField("LineNum");
    53 
    54     Value * const matches = iBuilder->CreateBitCast(loadInputStreamBlock("matchResult", iBuilder->getInt32(0)), scanwordVectorType);
    55     Value * const linebreaks = iBuilder->CreateBitCast(loadInputStreamBlock("lineBreak", iBuilder->getInt32(0)), scanwordVectorType);
     51    Value * const lastRecordStart = iBuilder->getProcessedItemCount("InputStream");
     52    Value * const lastRecordNum = iBuilder->getScalarField("LineNum");
     53
     54    Value * const matches = iBuilder->CreateBitCast(iBuilder->loadInputStreamBlock("matchResult", iBuilder->getInt32(0)), scanwordVectorType);
     55    Value * const linebreaks = iBuilder->CreateBitCast(iBuilder->loadInputStreamBlock("lineBreak", iBuilder->getInt32(0)), scanwordVectorType);
    5656
    5757    iBuilder->CreateBr(scanWordIteration);
     
    105105                // PRIOR_BREAKS_BLOCK
    106106                // If there are prior breaks, we count them and compute the record start position.
    107                 iBuilder->SetInsertPoint(prior_breaks_block);               
     107                iBuilder->SetInsertPoint(prior_breaks_block);
    108108                Value * matchedRecordNum = iBuilder->CreateAdd(iBuilder->CreatePopcount(prior_breaks), phiRecordNum);
    109109                Value * reverseDistance = iBuilder->CreateCountReverseZeroes(prior_breaks);
     
    132132            Value * const mrs = iBuilder->CreateZExtOrTrunc(matchRecordStart, (++args)->getType());
    133133            Value * const mre = iBuilder->CreateZExtOrTrunc(matchRecordEnd, (++args)->getType());
    134             Value * const inputStream = getRawInputPointer("InputStream", iBuilder->getInt32(0), iBuilder->getInt32(0));
     134            Value * const inputStream = iBuilder->getRawInputPointer("InputStream", iBuilder->getInt32(0), iBuilder->getInt32(0));
    135135            Value * const is = iBuilder->CreatePointerCast(inputStream, (++args)->getType());
    136136            if (mGrepType == GrepType::Normal) {
    137                 Value * const sz = iBuilder->CreateZExtOrTrunc(getBufferedSize("InputStream"), (++args)->getType());
    138                 Value * const fi = iBuilder->CreateZExtOrTrunc(getScalarField("FileIdx"), (++args)->getType());
     137                Value * const sz = iBuilder->CreateZExtOrTrunc(iBuilder->getBufferedSize("InputStream"), (++args)->getType());
     138                Value * const fi = iBuilder->CreateZExtOrTrunc(iBuilder->getScalarField("FileIdx"), (++args)->getType());
    139139                iBuilder->CreateCall(matcher, {mrn, mrs, mre, is, sz, fi});
    140140            } else {
     
    184184
    185185    iBuilder->SetInsertPoint(scanWordExit);
    186     setScalarField("BlockNo", iBuilder->CreateAdd(blockNo, ConstantInt::get(blockNo->getType(), 1)));
    187     setScalarField("LineNum", phiFinalRecordNum);
    188     setProcessedItemCount("InputStream", phiFinalRecordStart);
     186    iBuilder->setScalarField("BlockNo", iBuilder->CreateAdd(blockNo, ConstantInt::get(blockNo->getType(), 1)));
     187    iBuilder->setScalarField("LineNum", phiFinalRecordNum);
     188    iBuilder->setProcessedItemCount("InputStream", phiFinalRecordStart);
    189189}
    190190
  • icGREP/icgrep-devel/icgrep/kernels/scanmatchgen.h

    r5436 r5440  
    2020    bool moduleIDisSignature() const override { return true; }
    2121protected:
    22     void generateDoBlockMethod() override;
     22    void generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
    2323private:
    2424    const GrepType      mGrepType;
  • icGREP/icgrep-devel/icgrep/kernels/source_kernel.cpp

    r5436 r5440  
    2727/// MMAP SOURCE KERNEL
    2828
    29 void MMapSourceKernel::linkExternalMethods() {
     29void MMapSourceKernel::linkExternalMethods(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
    3030    mFileSizeFunction = iBuilder->LinkFunction("file_size", &file_size);
    3131}
    3232
    33 void MMapSourceKernel::generateInitializeMethod() {
    34     BasicBlock * const emptyFile = CreateBasicBlock("EmptyFile");
    35     BasicBlock * const nonEmptyFile = CreateBasicBlock("NonEmptyFile");
    36     BasicBlock * const exit = CreateBasicBlock("Exit");
     33void MMapSourceKernel::generateInitializeMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
     34    BasicBlock * const emptyFile = iBuilder->CreateBasicBlock("EmptyFile");
     35    BasicBlock * const nonEmptyFile = iBuilder->CreateBasicBlock("NonEmptyFile");
     36    BasicBlock * const exit = iBuilder->CreateBasicBlock("Exit");
    3737    IntegerType * const sizeTy = iBuilder->getSizeTy();
    38     Value * const fd = getScalarField("fileDescriptor");
     38    assert (iBuilder->getKernel() == this);
     39    Value * const fd = iBuilder->getScalarField("fileDescriptor");
    3940    assert (mFileSizeFunction);
    4041    Value * fileSize = iBuilder->CreateCall(mFileSizeFunction, fd);
     
    6364    size->addIncoming(fileSize, nonEmptyFile);
    6465
    65     setBaseAddress("sourceBuffer", buffer);
    66     setBufferedSize("sourceBuffer", size);
    67     setScalarField("readableBuffer", buffer);
    68     setScalarField("fileSize", fileSize);
     66    iBuilder->setBaseAddress("sourceBuffer", buffer);
     67    iBuilder->setBufferedSize("sourceBuffer", size);
     68    iBuilder->setScalarField("readableBuffer", buffer);
     69    iBuilder->setScalarField("fileSize", fileSize);
    6970    iBuilder->CreateMAdvise(buffer, fileSize, CBuilder::ADVICE_WILLNEED);
    7071
    7172}
    7273
    73 void MMapSourceKernel::generateDoSegmentMethod() {
    74 
    75     BasicBlock * dropPages = CreateBasicBlock("dropPages");
    76     BasicBlock * processSegment = CreateBasicBlock("produceData");
    77     BasicBlock * setTermination = CreateBasicBlock("setTermination");
    78     BasicBlock * mmapSourceExit = CreateBasicBlock("mmapSourceExit");
     74void MMapSourceKernel::generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
     75
     76    BasicBlock * dropPages = iBuilder->CreateBasicBlock("dropPages");
     77    BasicBlock * processSegment = iBuilder->CreateBasicBlock("produceData");
     78    BasicBlock * setTermination = iBuilder->CreateBasicBlock("setTermination");
     79    BasicBlock * mmapSourceExit = iBuilder->CreateBasicBlock("mmapSourceExit");
    7980
    8081    // instruct the OS that it can safely drop any fully consumed pages
    81     Value * consumed = getConsumedItemCount("sourceBuffer");
     82    Value * consumed = iBuilder->getConsumedItemCount("sourceBuffer");
    8283    Type * const consumedTy = consumed->getType();
    8384    Type * const voidPtrTy = iBuilder->getVoidPtrTy();
     
    9394        consumed = iBuilder->CreateSub(consumed, iBuilder->CreateURem(consumed, ConstantInt::get(consumedTy, pageSize)));
    9495    }
    95     Value * sourceBuffer = getBaseAddress("sourceBuffer");
     96    Value * sourceBuffer = iBuilder->getBaseAddress("sourceBuffer");
    9697    sourceBuffer = iBuilder->CreatePtrToInt(sourceBuffer, consumedTy);
    9798    Value * consumedBuffer = iBuilder->CreateAdd(sourceBuffer, consumed);
     
    100101
    101102
    102     Value * readableBuffer = getScalarField("readableBuffer");
     103    Value * readableBuffer = iBuilder->getScalarField("readableBuffer");
    103104    readableBuffer = iBuilder->CreatePtrToInt(readableBuffer, consumedTy);
    104105    Value * unnecessaryBytes = iBuilder->CreateSub(consumedBuffer, readableBuffer);
     
    113114    iBuilder->CreateMAdvise(iBuilder->CreateIntToPtr(readableBuffer, voidPtrTy), unnecessaryBytes, CBuilder::ADVICE_DONTNEED);
    114115    readableBuffer = iBuilder->CreateIntToPtr(iBuilder->CreateAdd(readableBuffer, unnecessaryBytes), voidPtrTy);
    115     setScalarField("readableBuffer", readableBuffer);
     116    iBuilder->setScalarField("readableBuffer", readableBuffer);
    116117    iBuilder->CreateBr(processSegment);
    117118
     
    119120    iBuilder->SetInsertPoint(processSegment);
    120121    ConstantInt * segmentItems = iBuilder->getSize(mSegmentBlocks * iBuilder->getBitBlockWidth());
    121     Value * const fileSize = getScalarField("fileSize");
    122     Value * const produced = iBuilder->CreateAdd(getProducedItemCount("sourceBuffer"), segmentItems);
     122    Value * const fileSize = iBuilder->getScalarField("fileSize");
     123    Value * const produced = iBuilder->CreateAdd(iBuilder->getProducedItemCount("sourceBuffer"), segmentItems);
    123124    Value * const lessThanFullSegment = iBuilder->CreateICmpULT(fileSize, produced);
    124125    iBuilder->CreateUnlikelyCondBr(lessThanFullSegment, setTermination, mmapSourceExit);
    125126    iBuilder->SetInsertPoint(setTermination);
    126127
    127     setTerminationSignal();
     128    iBuilder->setTerminationSignal();
    128129    iBuilder->CreateBr(mmapSourceExit);
    129130
     
    133134    itemsRead->addIncoming(produced, processSegment);
    134135    itemsRead->addIncoming(fileSize, setTermination);
    135     setProducedItemCount("sourceBuffer", itemsRead);
    136 }
    137 
    138 void MMapSourceKernel::generateFinalizeMethod() {
    139     iBuilder->CreateMUnmap(getBaseAddress("sourceBuffer"), getBufferedSize("sourceBuffer"));
     136    iBuilder->setProducedItemCount("sourceBuffer", itemsRead);
     137}
     138
     139void MMapSourceKernel::generateFinalizeMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
     140    iBuilder->CreateMUnmap(iBuilder->getBaseAddress("sourceBuffer"), iBuilder->getBufferedSize("sourceBuffer"));
    140141}
    141142
     
    154155/// READ SOURCE KERNEL
    155156
    156 void ReadSourceKernel::generateInitializeMethod() {
     157void ReadSourceKernel::generateInitializeMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
    157158    ConstantInt * const bufferSize = iBuilder->getSize(64 * getpagesize());
    158159    Value * const buffer = iBuilder->CreateAlignedMalloc(bufferSize, iBuilder->getCacheAlignment());
    159     setScalarField("buffer", buffer);
    160     setScalarField("capacity", bufferSize);
    161     setBaseAddress("sourceBuffer", buffer);
    162     setBufferedSize("sourceBuffer", iBuilder->getSize(0));
    163 }
    164 
    165 void ReadSourceKernel::generateDoSegmentMethod() {
     160    iBuilder->setScalarField("buffer", buffer);
     161    iBuilder->setScalarField("capacity", bufferSize);
     162    iBuilder->setBaseAddress("sourceBuffer", buffer);
     163    iBuilder->setBufferedSize("sourceBuffer", iBuilder->getSize(0));
     164}
     165
     166void ReadSourceKernel::generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
    166167
    167168    ConstantInt * const pageSize = iBuilder->getSize(getpagesize());
    168169    PointerType * const codeUnitPtrTy = IntegerType::get(iBuilder->getContext(), mCodeUnitWidth)->getPointerTo();
    169170    BasicBlock * const entryBlock = iBuilder->GetInsertBlock();
    170     BasicBlock * const exhaustedBuffer = CreateBasicBlock("ExhaustedBuffer");
    171     BasicBlock * const waitOnConsumers = CreateBasicBlock("WaitOnConsumers");
    172     BasicBlock * const readData = CreateBasicBlock("ReadData");
    173     BasicBlock * const stdInExit = CreateBasicBlock("StdInExit");
     171    BasicBlock * const exhaustedBuffer = iBuilder->CreateBasicBlock("ExhaustedBuffer");
     172    BasicBlock * const waitOnConsumers = iBuilder->CreateBasicBlock("WaitOnConsumers");
     173    BasicBlock * const readData = iBuilder->CreateBasicBlock("ReadData");
     174    BasicBlock * const stdInExit = iBuilder->CreateBasicBlock("StdInExit");
     175
     176    assert(iBuilder->getKernel() == this);
    174177
    175178    // The ReadSourceKernel begins by checking whether it needs to read another page of data
    176179    ConstantInt * const segmentSize = iBuilder->getSize(mSegmentBlocks * iBuilder->getBitBlockWidth());
    177     Value * bufferedSize = getBufferedSize("sourceBuffer");
    178     Value * const produced = getProducedItemCount("sourceBuffer");
     180    Value * bufferedSize = iBuilder->getBufferedSize("sourceBuffer");
     181    Value * const produced = iBuilder->getProducedItemCount("sourceBuffer");
    179182    Value * unreadSize = iBuilder->CreateSub(bufferedSize, produced);
    180183    iBuilder->CreateUnlikelyCondBr(iBuilder->CreateICmpULT(unreadSize, segmentSize), exhaustedBuffer, stdInExit);
     
    194197    // If so, we can append to our existing buffer without impacting any subsequent kernel.
    195198
    196     Value * inputStream = getRawOutputPointer("sourceBuffer", iBuilder->getInt32(0), iBuilder->getInt32(0));
     199    Value * inputStream = iBuilder->getRawOutputPointer("sourceBuffer", iBuilder->getInt32(0), iBuilder->getInt32(0));
    197200    inputStream = iBuilder->CreatePointerCast(inputStream, codeUnitPtrTy);
    198201    Value * const originalPtr = iBuilder->CreateGEP(inputStream, produced);
    199     Value * const buffer = iBuilder->CreatePointerCast(getScalarField("buffer"), codeUnitPtrTy);
    200     Value * const capacity = getScalarField("capacity");
     202    Value * const buffer = iBuilder->CreatePointerCast(iBuilder->getScalarField("buffer"), codeUnitPtrTy);
     203    Value * const capacity = iBuilder->getScalarField("capacity");
    201204    Value * const canAppend = iBuilder->CreateICmpULT(iBuilder->CreateGEP(originalPtr, pageSize), iBuilder->CreateGEP(buffer, capacity));
    202205    iBuilder->CreateLikelyCondBr(canAppend, readData, waitOnConsumers);
     
    204207    // First wait on any consumers to finish processing then check how much data has been consumed.
    205208    iBuilder->SetInsertPoint(waitOnConsumers);
    206     CreateWaitForConsumers();
     209    iBuilder->CreateConsumerWait();
    207210    // Then determine how much data has been consumed and how much needs to be copied back, noting
    208211    // that our "unproduced" data must be block aligned.
    209212    const auto alignment = iBuilder->getBitBlockWidth() / 8;
    210213    Constant * const alignmentMask = ConstantExpr::getNeg(iBuilder->getSize(alignment));
    211     Value * const consumed = iBuilder->CreateAnd(getConsumedItemCount("sourceBuffer"), alignmentMask);
     214    Value * const consumed = iBuilder->CreateAnd(iBuilder->getConsumedItemCount("sourceBuffer"), alignmentMask);
    212215    Value * const remaining = iBuilder->CreateSub(bufferedSize, consumed);
    213216    Value * const unconsumedPtr = iBuilder->CreateGEP(inputStream, consumed);
    214217    Value * const consumedMajority = iBuilder->CreateICmpULT(iBuilder->CreateGEP(buffer, remaining), unconsumedPtr);
    215     BasicBlock * const copyBack = CreateBasicBlock("CopyBack");
    216     BasicBlock * const expandAndCopyBack = CreateBasicBlock("ExpandAndCopyBack");
    217     BasicBlock * const calculateLogicalAddress = CreateBasicBlock("CalculateLogicalAddress");
     218    BasicBlock * const copyBack = iBuilder->CreateBasicBlock("CopyBack");
     219    BasicBlock * const expandAndCopyBack = iBuilder->CreateBasicBlock("ExpandAndCopyBack");
     220    BasicBlock * const calculateLogicalAddress = iBuilder->CreateBasicBlock("CalculateLogicalAddress");
    218221    // Have we consumed enough data that we can safely copy back the unconsumed data without needing
    219222    // a temporary buffer? (i.e., B + remaining < L + consumed)
     
    230233    iBuilder->CreateMemCpy(expandedPtr, unconsumedPtr, remaining, alignment);
    231234    iBuilder->CreateAlignedFree(buffer);
    232     setScalarField("buffer", expandedBuffer);
    233     setScalarField("capacity", expandedCapacity);   
     235    iBuilder->setScalarField("buffer", expandedBuffer);
     236    iBuilder->setScalarField("capacity", expandedCapacity);
    234237    iBuilder->CreateBr(calculateLogicalAddress);
    235238    // Update the logical address for this buffer....
     
    240243    Value * const modifiedPtr = iBuilder->CreateGEP(baseAddress, remaining);
    241244    Value * const logicalAddress = iBuilder->CreateGEP(modifiedPtr, iBuilder->CreateNeg(produced));
    242     setBaseAddress("sourceBuffer", logicalAddress);
     245    iBuilder->setBaseAddress("sourceBuffer", logicalAddress);
    243246    iBuilder->CreateBr(readData);
    244247    // Regardless of whether we're simply appending data or had to allocate a new buffer, read a new page
     
    250253    addr->addIncoming(originalPtr, exhaustedBuffer);
    251254    addr->addIncoming(modifiedPtr, calculateLogicalAddress);
    252     Value * bytesRead = iBuilder->CreateReadCall(getScalarField("fileDescriptor"), addr, pageSize);
     255    assert(iBuilder->getKernel() == this);
     256    Value * const fd = iBuilder->getScalarField("fileDescriptor");
     257    Value * bytesRead = iBuilder->CreateReadCall(fd, addr, pageSize);
    253258    unreadSize = iBuilder->CreateAdd(unreadSize, bytesRead);
    254259    bufferedSize = iBuilder->CreateAdd(bufferedSize, bytesRead);
    255     setBufferedSize("sourceBuffer", bufferedSize);
     260    iBuilder->setBufferedSize("sourceBuffer", bufferedSize);
    256261    Value * const exhaustedInputSource = iBuilder->CreateICmpULT(unreadSize, segmentSize);
    257     BasicBlock * const setTermination = CreateBasicBlock("SetTermination");
     262    BasicBlock * const setTermination = iBuilder->CreateBasicBlock("SetTermination");
    258263    iBuilder->CreateUnlikelyCondBr(exhaustedInputSource, setTermination, stdInExit);
    259264
     
    262267    Value * const bytesToZero = iBuilder->CreateSub(segmentSize, unreadSize);
    263268    iBuilder->CreateMemZero(iBuilder->CreateGEP(addr, unreadSize), bytesToZero);
    264     setTerminationSignal();
     269    iBuilder->setTerminationSignal();
    265270    iBuilder->CreateBr(stdInExit);
    266271
     
    273278    items->addIncoming(segmentSize, readData);
    274279    items->addIncoming(unreadSize, setTermination);
    275     setProducedItemCount("sourceBuffer", iBuilder->CreateAdd(produced, items));
    276 }
    277 
    278 void ReadSourceKernel::generateFinalizeMethod() {
    279     iBuilder->CreateAlignedFree(getScalarField("buffer"));
     280    iBuilder->setProducedItemCount("sourceBuffer", iBuilder->CreateAdd(produced, items));
     281}
     282
     283void ReadSourceKernel::generateFinalizeMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
     284    iBuilder->CreateAlignedFree(iBuilder->getScalarField("buffer"));
    280285}
    281286
     
    294299/// MEMORY SOURCE KERNEL
    295300
    296 void MemorySourceKernel::generateInitializeMethod() {
    297     setBaseAddress("sourceBuffer", iBuilder->CreatePointerCast(getScalarField("fileSource"), iBuilder->getVoidPtrTy()));
    298     setBufferedSize("sourceBuffer", getScalarField("fileSize"));
    299 }
    300 
    301 void MemorySourceKernel::generateDoSegmentMethod() {
     301void MemorySourceKernel::generateInitializeMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
     302    iBuilder->setBaseAddress("sourceBuffer", iBuilder->CreatePointerCast(iBuilder->getScalarField("fileSource"), iBuilder->getVoidPtrTy()));
     303    iBuilder->setBufferedSize("sourceBuffer", iBuilder->getScalarField("fileSize"));
     304}
     305
     306void MemorySourceKernel::generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
    302307
    303308    BasicBlock * entryBlock = iBuilder->GetInsertBlock();
    304     BasicBlock * setTermination = CreateBasicBlock("setTermination");
    305     BasicBlock * mmapSourceExit = CreateBasicBlock("sourceExit");
     309    BasicBlock * setTermination = iBuilder->CreateBasicBlock("setTermination");
     310    BasicBlock * mmapSourceExit = iBuilder->CreateBasicBlock("sourceExit");
    306311    ConstantInt * segmentItems = iBuilder->getSize(mSegmentBlocks * iBuilder->getBitBlockWidth());
    307     Value * fileItems = getScalarField("fileSize");
     312    Value * fileItems = iBuilder->getScalarField("fileSize");
    308313    if (mCodeUnitWidth > 8) {
    309314        fileItems = iBuilder->CreateUDiv(fileItems, iBuilder->getSize(mCodeUnitWidth / 8));
    310315    }
    311     Value * produced = getProducedItemCount("sourceBuffer");
     316    Value * produced = iBuilder->getProducedItemCount("sourceBuffer");
    312317    produced = iBuilder->CreateAdd(produced, segmentItems);
    313318    Value * lessThanFullSegment = iBuilder->CreateICmpULT(fileItems, produced);
    314319    iBuilder->CreateCondBr(lessThanFullSegment, setTermination, mmapSourceExit);
    315320    iBuilder->SetInsertPoint(setTermination);
    316     setTerminationSignal();
     321    iBuilder->setTerminationSignal();
    317322    iBuilder->CreateBr(mmapSourceExit);
    318323
     
    322327    itemsRead->addIncoming(produced, entryBlock);
    323328    itemsRead->addIncoming(fileItems, setTermination);
    324     setProducedItemCount("sourceBuffer", itemsRead);
     329    iBuilder->setProducedItemCount("sourceBuffer", itemsRead);
    325330}
    326331
  • icGREP/icgrep-devel/icgrep/kernels/source_kernel.h

    r5436 r5440  
    1717    bool moduleIDisSignature() const override { return true; }
    1818protected:
    19     void linkExternalMethods() override;
    20     void generateInitializeMethod() override;
    21     void generateDoSegmentMethod() override;
    22     void generateFinalizeMethod() override;
     19    void linkExternalMethods(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
     20    void generateInitializeMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
     21    void generateDoSegmentMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
     22    void generateFinalizeMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
    2323protected:
    2424    const unsigned          mSegmentBlocks;
     
    3333    bool moduleIDisSignature() const override { return true; }
    3434protected:
    35     void generateInitializeMethod() override;
    36     void generateDoSegmentMethod() override;
    37     void generateFinalizeMethod() override;
     35    void generateInitializeMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
     36    void generateDoSegmentMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
     37    void generateFinalizeMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
    3838private:
    3939    unsigned mSegmentBlocks;
     
    4646    bool moduleIDisSignature() const override { return true; }
    4747protected:
    48     void generateInitializeMethod() override;
    49     void generateDoSegmentMethod() override;
     48    void generateInitializeMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
     49    void generateDoSegmentMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
    5050private:
    5151    unsigned mSegmentBlocks;
  • icGREP/icgrep-devel/icgrep/kernels/stdout_kernel.cpp

    r5439 r5440  
    1919// However, if the segment spans two memory areas (e.g., because of wraparound),
    2020// then two write calls are made.
    21 void StdOutKernel::generateDoSegmentMethod() {
     21void StdOutKernel::generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
    2222    PointerType * i8PtrTy = iBuilder->getInt8PtrTy();
    2323
    2424    Constant * blockItems = iBuilder->getSize(iBuilder->getBitBlockWidth() - 1);
    2525    Constant * itemBytes = iBuilder->getSize(mCodeUnitWidth / 8);
    26     Value * processed = getProcessedItemCount("codeUnitBuffer");
     26    Value * processed = iBuilder->getProcessedItemCount("codeUnitBuffer");
    2727    Value * itemsToDo = iBuilder->CreateSub(mAvailableItemCount[0], processed);
    2828    // There may be two memory areas if we are at the physical end of a circular buffer.
     
    3030    Value * wraparound = nullptr;
    3131    if (isa<CircularBuffer>(b) || isa<CircularCopybackBuffer>(b)) {
    32 
    33 
    34 
    35         Value * accessible = b->getLinearlyAccessibleItems(iBuilder, processed);
     32        Value * accessible = iBuilder->getLinearlyAccessibleItems("codeUnitBuffer", processed);
    3633        wraparound = iBuilder->CreateICmpULT(accessible, itemsToDo);
    3734        itemsToDo = iBuilder->CreateSelect(wraparound, accessible, itemsToDo);
     
    3936   
    4037    Value * byteOffset = iBuilder->CreateMul(iBuilder->CreateAnd(processed, blockItems), itemBytes);
    41     Value * bytePtr = iBuilder->CreatePointerCast(getInputStreamBlockPtr("codeUnitBuffer", iBuilder->getInt32(0)), i8PtrTy);
     38    Value * bytePtr = iBuilder->CreatePointerCast(iBuilder->getInputStreamBlockPtr("codeUnitBuffer", iBuilder->getInt32(0)), i8PtrTy);
    4239    bytePtr = iBuilder->CreateGEP(bytePtr, byteOffset);
    4340
     
    4542
    4643    processed = iBuilder->CreateAdd(processed, itemsToDo);
    47     setProcessedItemCount("codeUnitBuffer", processed);
     44    iBuilder->setProcessedItemCount("codeUnitBuffer", processed);
    4845   
    4946    // Now we may process the second area (if required).
    5047    if (isa<CircularBuffer>(b) || isa<CircularCopybackBuffer>(b)) {
    51         BasicBlock * wrapAroundWrite = CreateBasicBlock("wrapAroundWrite");
    52         BasicBlock * stdoutExit = CreateBasicBlock("stdoutExit");
     48        BasicBlock * wrapAroundWrite = iBuilder->CreateBasicBlock("wrapAroundWrite");
     49        BasicBlock * stdoutExit = iBuilder->CreateBasicBlock("stdoutExit");
    5350        iBuilder->CreateCondBr(wraparound, wrapAroundWrite, stdoutExit);
    5451        iBuilder->SetInsertPoint(wrapAroundWrite);
     
    5653        // Calculate from the updated value of processed;
    5754        byteOffset = iBuilder->CreateMul(iBuilder->CreateAnd(processed, blockItems), itemBytes);
    58         Value * bytePtr = iBuilder->CreatePointerCast(getInputStreamBlockPtr("codeUnitBuffer", iBuilder->getInt32(0)), i8PtrTy);
     55        Value * bytePtr = iBuilder->CreatePointerCast(iBuilder->getInputStreamBlockPtr("codeUnitBuffer", iBuilder->getInt32(0)), i8PtrTy);
    5956        bytePtr = iBuilder->CreateGEP(bytePtr, byteOffset);
    6057
     
    6259        iBuilder->CreateWriteCall(iBuilder->getInt32(1), bytePtr, iBuilder->CreateMul(itemsToDo, itemBytes));
    6360        processed = iBuilder->CreateAdd(processed, itemsToDo);
    64         setProcessedItemCount("codeUnitBuffer", mAvailableItemCount[0]);
     61        iBuilder->setProcessedItemCount("codeUnitBuffer", mAvailableItemCount[0]);
    6562        iBuilder->CreateBr(stdoutExit);
    6663        iBuilder->SetInsertPoint(stdoutExit);
     
    7471}
    7572
    76 void FileSink::generateInitializeMethod() {
    77     BasicBlock * setTerminationOnFailure = CreateBasicBlock("setTerminationOnFailure");
    78     BasicBlock * fileSinkInitExit = CreateBasicBlock("fileSinkInitExit");
    79     Value * fileName = getScalarField("fileName");
     73void FileSink::generateInitializeMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
     74    BasicBlock * setTerminationOnFailure = iBuilder->CreateBasicBlock("setTerminationOnFailure");
     75    BasicBlock * fileSinkInitExit = iBuilder->CreateBasicBlock("fileSinkInitExit");
     76    Value * fileName = iBuilder->getScalarField("fileName");
    8077    Value * fileNameLength = iBuilder->CreateStrlenCall(fileName);
    8178    // Make a temporary file name template with the characters "XXXXXX" appended
     
    8380    Constant * suffixPlusNullLength = iBuilder->getSize(7);
    8481    Value * tmpFileNamePtr = iBuilder->CreatePointerCast(iBuilder->CreateMalloc(iBuilder->CreateAdd(fileNameLength, suffixPlusNullLength)), iBuilder->getInt8PtrTy());
    85     setScalarField("tmpFileName", tmpFileNamePtr);
     82    iBuilder->setScalarField("tmpFileName", tmpFileNamePtr);
    8683    iBuilder->CreateMemCpy(tmpFileNamePtr, fileName, fileNameLength, 1);
    8784#ifdef BACKUP_OLDFILE
    88     iBuilder->CreateMemCpy(iBuilder->CreateGEP(tmpFileNamePtr, fileNameLength), iBuilder->CreateGlobalStringPtr(".saved"), suffixPlusNullLength, 1);
     85    iBuilder->CreateMemCpy(iBuilder->CreateGEP(tmpFileNamePtr, fileNameLength), iBuilder->GetString(".saved"), suffixPlusNullLength, 1);
    8986    iBuilder->CreateRenameCall(fileName, tmpFileNamePtr);
    9087#else
    9188    iBuilder->CreateUnlinkCall(fileName);
    9289#endif
    93     iBuilder->CreateMemCpy(iBuilder->CreateGEP(tmpFileNamePtr, fileNameLength), iBuilder->CreateGlobalStringPtr("XXXXXX"), suffixPlusNullLength, 1);
     90    iBuilder->CreateMemCpy(iBuilder->CreateGEP(tmpFileNamePtr, fileNameLength), iBuilder->GetString("XXXXXX"), suffixPlusNullLength, 1);
    9491    Value * fileDes = iBuilder->CreateMkstempCall(tmpFileNamePtr);
    95     setScalarField("fileDes", fileDes);
     92    iBuilder->setScalarField("fileDes", fileDes);
    9693    Value * failure = iBuilder->CreateICmpEQ(fileDes, iBuilder->getInt32(-1));
    9794    iBuilder->CreateCondBr(failure, setTerminationOnFailure, fileSinkInitExit);
    9895    iBuilder->SetInsertPoint(setTerminationOnFailure);
    99     setTerminationSignal();
     96    iBuilder->setTerminationSignal();
    10097    iBuilder->CreateBr(fileSinkInitExit);
    10198    iBuilder->SetInsertPoint(fileSinkInitExit);
    10299}
    103100
    104 void FileSink::generateDoSegmentMethod() {
     101void FileSink::generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> &iBuilder) {
    105102
    106103    PointerType * i8PtrTy = iBuilder->getInt8PtrTy();
    107104
    108     BasicBlock * closeFile = CreateBasicBlock("closeFile");
    109     BasicBlock * fileOutExit = CreateBasicBlock("fileOutExit");
     105    BasicBlock * closeFile = iBuilder->CreateBasicBlock("closeFile");
     106    BasicBlock * fileOutExit = iBuilder->CreateBasicBlock("fileOutExit");
    110107    Constant * blockItems = iBuilder->getSize(iBuilder->getBitBlockWidth());
    111108    Constant * itemBytes = iBuilder->getSize(mCodeUnitWidth/8);
    112109
    113     Value * fileDes = getScalarField("fileDes");
    114     Value * available = getAvailableItemCount("codeUnitBuffer");
    115     Value * processed = getProcessedItemCount("codeUnitBuffer");
     110    Value * fileDes = iBuilder->getScalarField("fileDes");
     111    Value * available = iBuilder->getAvailableItemCount("codeUnitBuffer");
     112    Value * processed = iBuilder->getProcessedItemCount("codeUnitBuffer");
    116113    Value * itemsToDo = iBuilder->CreateSub(available, processed);
    117114    // There may be two memory areas if we are at the physical end of a circular buffer.
     
    119116    Value * wraparound = nullptr;
    120117    if (isa<CircularBuffer>(b) || isa<CircularCopybackBuffer>(b)) {
    121         Value * accessible = b->getLinearlyAccessibleItems(iBuilder, processed);
     118        Value * accessible = iBuilder->getLinearlyAccessibleItems("codeUnitBuffer", processed);
    122119        wraparound = iBuilder->CreateICmpULT(accessible, itemsToDo);
    123120        itemsToDo = iBuilder->CreateSelect(wraparound, accessible, itemsToDo);
     
    125122   
    126123    Value * byteOffset = iBuilder->CreateMul(iBuilder->CreateURem(processed, blockItems), itemBytes);
    127     Value * bytePtr = iBuilder->CreatePointerCast(getInputStreamBlockPtr("codeUnitBuffer", iBuilder->getInt32(0)), i8PtrTy);
     124    Value * bytePtr = iBuilder->CreatePointerCast(iBuilder->getInputStreamBlockPtr("codeUnitBuffer", iBuilder->getInt32(0)), i8PtrTy);
    128125    bytePtr = iBuilder->CreateGEP(bytePtr, byteOffset);
    129126    iBuilder->CreateWriteCall(fileDes, bytePtr, iBuilder->CreateMul(itemsToDo, itemBytes));
    130127   
    131128    processed = iBuilder->CreateAdd(processed, itemsToDo);
    132     setProcessedItemCount("codeUnitBuffer", processed);
     129    iBuilder->setProcessedItemCount("codeUnitBuffer", processed);
    133130   
    134131    // Now we may process the second area (if required).
    135132    if (isa<CircularBuffer>(b) || isa<CircularCopybackBuffer>(b)) {
    136         BasicBlock * wrapAroundWrite = CreateBasicBlock("wrapAroundWrite");
    137         BasicBlock * checkFinal = CreateBasicBlock("checkFinal");
     133        BasicBlock * wrapAroundWrite = iBuilder->CreateBasicBlock("wrapAroundWrite");
     134        BasicBlock * checkFinal = iBuilder->CreateBasicBlock("checkFinal");
    138135        iBuilder->CreateCondBr(wraparound, wrapAroundWrite, checkFinal);
    139136        iBuilder->SetInsertPoint(wrapAroundWrite);
     
    141138        // Calculate from the updated value of processed;
    142139        byteOffset = iBuilder->CreateMul(iBuilder->CreateURem(processed, blockItems), itemBytes);
    143         Value * bytePtr = iBuilder->CreatePointerCast(getInputStreamBlockPtr("codeUnitBuffer", iBuilder->getInt32(0)), i8PtrTy);
     140        Value * bytePtr = iBuilder->CreatePointerCast(iBuilder->getInputStreamBlockPtr("codeUnitBuffer", iBuilder->getInt32(0)), i8PtrTy);
    144141        bytePtr = iBuilder->CreateGEP(bytePtr, byteOffset);
    145142        itemsToDo = iBuilder->CreateSub(available, processed);
    146143        iBuilder->CreateWriteCall(fileDes, bytePtr, iBuilder->CreateMul(itemsToDo, itemBytes));
    147144        processed = iBuilder->CreateAdd(processed, itemsToDo);
    148         setProcessedItemCount("codeUnitBuffer", available);
     145        iBuilder->setProcessedItemCount("codeUnitBuffer", available);
    149146        iBuilder->CreateBr(checkFinal);
    150147        iBuilder->SetInsertPoint(checkFinal);
     
    154151    iBuilder->SetInsertPoint(closeFile);
    155152    iBuilder->CreateCloseCall(fileDes);
    156     Value * newFileNamePtr = getScalarField("fileName");
    157     Value * tmpFileNamePtr = getScalarField("tmpFileName");
     153    Value * newFileNamePtr = iBuilder->getScalarField("fileName");
     154    Value * tmpFileNamePtr = iBuilder->getScalarField("tmpFileName");
    158155    iBuilder->CreateRenameCall(tmpFileNamePtr, newFileNamePtr);
    159156    iBuilder->CreateFree(tmpFileNamePtr);
  • icGREP/icgrep-devel/icgrep/kernels/stdout_kernel.h

    r5436 r5440  
    1212namespace kernel {
    1313
    14 class StdOutKernel : public SegmentOrientedKernel {
     14class StdOutKernel final : public SegmentOrientedKernel {
    1515public:
    1616    StdOutKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, unsigned codeUnitWidth);
    1717private:
    18     void generateDoSegmentMethod() override final;
     18    void generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & iBuilder) override;
    1919private:
    2020    const unsigned mCodeUnitWidth;
     
    2323
    2424
    25 class FileSink : public SegmentOrientedKernel {
     25class FileSink final : public SegmentOrientedKernel {
    2626public: 
    2727    FileSink(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, unsigned codeUnitWidth);
    2828protected:
    29     void generateInitializeMethod() override final;
    30     void generateDoSegmentMethod() override final;
     29    void generateInitializeMethod(const std::unique_ptr<KernelBuilder> & iBuilder) override;
     30    void generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & iBuilder) override;
    3131private:
    3232    const unsigned mCodeUnitWidth;
  • icGREP/icgrep-devel/icgrep/kernels/streams_merge.cpp

    r5436 r5440  
    2121}
    2222
    23 void StreamsMerge::generateDoBlockMethod() {
     23void StreamsMerge::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> &iBuilder) {
    2424
    2525    std::vector<Value *> resultStreams;
    2626
    2727    for (unsigned j = 0; j < mStreamsPerSet; j++) {
    28         resultStreams.push_back(loadInputStreamBlock("inputGroup" + std::to_string(0), iBuilder->getInt32(j)));
     28        resultStreams.push_back(iBuilder->loadInputStreamBlock("inputGroup" + std::to_string(0), iBuilder->getInt32(j)));
    2929    }
    3030
    3131    for (unsigned i = 1; i < mInputSets; i++) {
    3232        for (unsigned j = 0; j < mStreamsPerSet; j++) {
    33             resultStreams[j] = iBuilder->CreateOr(resultStreams[j], loadInputStreamBlock("inputGroup" + std::to_string(i), iBuilder->getInt32(j)));
     33            resultStreams[j] = iBuilder->CreateOr(resultStreams[j], iBuilder->loadInputStreamBlock("inputGroup" + std::to_string(i), iBuilder->getInt32(j)));
    3434        }
    3535    }
    3636    for (unsigned j = 0; j < mStreamsPerSet; j++) {
    37