Changeset 5297


Ignore:
Timestamp:
Feb 3, 2017, 1:25:53 PM (8 months ago)
Author:
nmedfort
Message:

Partial removal of BlockNo?

Location:
icGREP/icgrep-devel/icgrep
Files:
35 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IR_Gen/types/streamtype.cpp

    r5260 r5297  
    55namespace IDISA {
    66
    7 llvm::Type * StreamType::resolveType(IDISA_Builder * const builder) {
     7llvm::Type * StreamType::resolveType(IDISA_Builder * const builder) const {
    88    if (mFieldWidth == 1) return builder->getBitBlockType();
    99    return llvm::ArrayType::get(builder->getBitBlockType(), mFieldWidth);
  • icGREP/icgrep-devel/icgrep/IR_Gen/types/streamtype.h

    r5260 r5297  
    2121    }
    2222
    23     llvm::Type * resolveType(IDISA_Builder * const iBuilder);
     23    llvm::Type * resolveType(IDISA_Builder * const iBuilder) const;
    2424
    2525    /// Methods for support type inquiry through isa, cast, and dyn_cast.
  • icGREP/icgrep-devel/icgrep/editd/editd_cpu_kernel.cpp

    r5292 r5297  
    2323}
    2424
    25 void editdCPUKernel::generateDoBlockMethod(Value * blockNo) {
     25void editdCPUKernel::generateDoBlockMethod() {
    2626    auto savePoint = iBuilder->saveIP();
    2727
     
    4141    Value * pattCh = iBuilder->CreateLoad(pattPtr);
    4242    Value * pattIdx = iBuilder->CreateAnd(iBuilder->CreateLShr(pattCh, 1), ConstantInt::get(int8ty, 3));
    43     Value * pattStreamPtr = getStream("CCStream", blockNo, iBuilder->CreateZExt(pattIdx, int32ty));
     43    Value * pattStreamPtr = getInputStream("CCStream", iBuilder->CreateZExt(pattIdx, int32ty));
    4444    Value * pattStream = iBuilder->CreateLoad(pattStreamPtr);
    4545    pattPos = iBuilder->CreateAdd(pattPos, ConstantInt::get(int32ty, 1));
     
    5454        pattCh = iBuilder->CreateLoad(pattPtr);
    5555        pattIdx = iBuilder->CreateAnd(iBuilder->CreateLShr(pattCh, 1), ConstantInt::get(int8ty, 3));
    56         pattStreamPtr = getStream("CCStream", blockNo, iBuilder->CreateZExt(pattIdx, int32ty));
     56        pattStreamPtr = getInputStream("CCStream", iBuilder->CreateZExt(pattIdx, int32ty));
    5757        pattStream = iBuilder->CreateLoad(pattStreamPtr);
    5858
     
    7272    }
    7373   
    74     Value * ptr = getStream("ResultStream", blockNo, iBuilder->getInt32(0));
     74    Value * ptr = getOutputStream("ResultStream", iBuilder->getInt32(0));
    7575    iBuilder->CreateStore(e[mPatternLen - 1][0], ptr);
    7676    for(unsigned j = 1; j<= mEditDistance; j++){
    77         ptr = getStream("ResultStream", blockNo, iBuilder->getInt32(j));
     77        ptr = getOutputStream("ResultStream", iBuilder->getInt32(j));
    7878        iBuilder->CreateStore(iBuilder->CreateAnd(e[mPatternLen-1][j], iBuilder->CreateNot(e[mPatternLen-1][j-1])), ptr);
    7979    }
     
    8383}
    8484
    85 void editdCPUKernel::generateFinalBlockMethod(Value * remainingBytes, Value * blockNo) {
     85void editdCPUKernel::generateFinalBlockMethod(Value * remainingBytes) {
    8686    setScalarField("EOFmask", iBuilder->bitblock_mask_from(remainingBytes));
    8787    CreateDoBlockMethodCall();
  • icGREP/icgrep-devel/icgrep/editd/editd_cpu_kernel.h

    r5292 r5297  
    2121   
    2222private:
    23     void generateDoBlockMethod(llvm::Value * blockNo) override;
    24     void generateFinalBlockMethod(llvm::Value * remainingBytes, llvm::Value * blockNo) override;
     23    void generateDoBlockMethod() override;
     24    void generateFinalBlockMethod(llvm::Value * remainingBytes) override;
    2525    void bitblock_advance_ci_co(llvm::Value * val, unsigned shift, llvm::Value * stideCarryArr, unsigned carryIdx, std::vector<std::vector<llvm::Value *>> & adv, std::vector<std::vector<int>> & calculated, int i, int j) const;
    2626    unsigned mEditDistance;
  • icGREP/icgrep-devel/icgrep/editd/editd_gpu_kernel.cpp

    r5292 r5297  
    2222}
    2323
    24 void editdGPUKernel::generateDoBlockMethod(Value * blockNo) {
     24void editdGPUKernel::generateDoBlockMethod() {
    2525
    2626    IntegerType * const int32ty = iBuilder->getInt32Ty();
     
    4444    Value * pattCh = iBuilder->CreateLoad(pattPtr);
    4545    Value * pattIdx = iBuilder->CreateAnd(iBuilder->CreateLShr(pattCh, 1), ConstantInt::get(int8ty, 3));
    46     Value * pattStreamPtr = getStream("CCStream", blockNo, iBuilder->CreateZExt(pattIdx, int32ty));
     46    Value * pattStreamPtr = getInputStream("CCStream", iBuilder->CreateZExt(pattIdx, int32ty));
    4747    Value * pattStream = iBuilder->CreateLoad(pattStreamPtr);
    4848    pattPos = iBuilder->CreateAdd(pattPos, ConstantInt::get(int32ty, 1));
     
    5656        pattCh = iBuilder->CreateLoad(pattPtr);
    5757        pattIdx = iBuilder->CreateAnd(iBuilder->CreateLShr(pattCh, 1), ConstantInt::get(int8ty, 3));
    58         pattStreamPtr = getStream("CCStream", blockNo, iBuilder->CreateZExt(pattIdx, int32ty));
     58        pattStreamPtr = getInputStream("CCStream", iBuilder->CreateZExt(pattIdx, int32ty));
    5959        pattStream = iBuilder->CreateLoad(pattStreamPtr);
    6060        bitblock_advance_ci_co(iBuilder, e[i-1][0], 1, stideCarryArr, carryIdx++, adv, calculated, i-1, 0);
     
    7171        pattPos = iBuilder->CreateAdd(pattPos, ConstantInt::get(int32ty, 1));
    7272    }
    73     Value * ptr = getStream("ResultStream", blockNo, iBuilder->getInt32(0));
     73    Value * ptr = getOutputStream("ResultStream", iBuilder->getInt32(0));
    7474    iBuilder->CreateStore(e[mPatternLen-1][0], ptr);
    7575    for(unsigned j = 1; j<= mEditDistance; j++){
    76         ptr = getStream("ResultStream", blockNo, iBuilder->getInt32(j));
     76        ptr = getOutputStream("ResultStream", iBuilder->getInt32(j));
    7777        iBuilder->CreateStore(iBuilder->CreateAnd(e[mPatternLen - 1][j], iBuilder->CreateNot(e[mPatternLen - 1][j - 1])), ptr);
    7878    }
    7979}
    8080
    81 void editdGPUKernel::generateFinalBlockMethod(Value * remainingBytes, Value * blockNo) {
     81void editdGPUKernel::generateFinalBlockMethod(Value * remainingBytes) {
    8282    setScalarField("EOFmask", iBuilder->bitblock_mask_from(remainingBytes));
    8383    CreateDoBlockMethodCall();
  • icGREP/icgrep-devel/icgrep/editd/editd_gpu_kernel.h

    r5292 r5297  
    2121   
    2222private:
    23     void generateDoBlockMethod(llvm::Value * blockNo) override;
    24     void generateFinalBlockMethod(llvm::Value * remainingBytes, llvm::Value * blockNo) override;
     23    void generateDoBlockMethod() override;
     24    void generateFinalBlockMethod(llvm::Value * remainingBytes) override;
    2525    unsigned mEditDistance;
    2626    unsigned mPatternLen;
  • icGREP/icgrep-devel/icgrep/editd/editdscan_kernel.cpp

    r5292 r5297  
    1818}
    1919
    20 void editdScanKernel::generateDoBlockMethod(Value * blockNo) {
     20void editdScanKernel::generateDoBlockMethod() {
    2121    auto savePoint = iBuilder->saveIP();
    2222    Function * scanWordFunction = generateScanWordRoutine(iBuilder->getModule());
     
    2626    Type * T = iBuilder->getIntNTy(mScanwordBitWidth);
    2727    VectorType * scanwordVectorType =  VectorType::get(T, fieldCount);
     28    Value * blockNo = getBlockNo();
    2829    Value * scanwordPos = iBuilder->CreateMul(blockNo, ConstantInt::get(blockNo->getType(), iBuilder->getBitBlockWidth()));
    2930   
    3031    std::vector<Value * > matchWordVectors;
    3132    for(unsigned d = 0; d <= mEditDistance; d++) {
    32         Value * ptr = getStream("matchResults", blockNo, iBuilder->getInt32(d));
     33        Value * ptr = getInputStream("matchResults", iBuilder->getInt32(d));
    3334        Value * matches = iBuilder->CreateBlockAlignedLoad(ptr);
    3435        matchWordVectors.push_back(iBuilder->CreateBitCast(matches, scanwordVectorType));
     
    4142        }
    4243        scanwordPos = iBuilder->CreateAdd(scanwordPos, ConstantInt::get(T, mScanwordBitWidth));
    43 
    4444    }
    4545}
  • icGREP/icgrep-devel/icgrep/editd/editdscan_kernel.h

    r5292 r5297  
    1818       
    1919private:
    20     void generateDoBlockMethod(llvm::Value * blockNo) override;
     20    void generateDoBlockMethod() override;
    2121    llvm::Function * generateScanWordRoutine(llvm::Module * m) const;
    2222       
  • icGREP/icgrep-devel/icgrep/icgrep-devel.files

    r5285 r5297  
    223223utf8_encoder.h
    224224wc.cpp
     225CMakeLists.txt
     226kernels/alignedprint.h
     227kernels/alignedprint.cpp
  • icGREP/icgrep-devel/icgrep/kernels/alignedprint.cpp

    r5296 r5297  
    3838}
    3939
    40 void PrintableBits::generateDoBlockMethod(Value * blockNo) {   
    41     Value * strmPtr = getStream("bitStream", blockNo, iBuilder->getInt32(0));
     40void PrintableBits::generateDoBlockMethod() {
     41    Value * strmPtr = getStream("bitStream", iBuilder->getInt32(0));
    4242   
    4343    Value * bitStrmVal = iBuilder->CreateBlockAlignedLoad(strmPtr);
     
    5757   
    5858    for (unsigned j = 0; j < 8; ++j) {
    59         Value * ptr = getStream("byteStream", blockNo, iBuilder->getInt32(0), iBuilder->getInt32(j));
     59        Value * ptr = getStream("byteStream", iBuilder->getInt32(0), iBuilder->getInt32(j));
    6060        iBuilder->CreateBlockAlignedStore(iBuilder->bitCast(printableBytes[j]), ptr);
    6161    }
     
    7878}
    7979
    80 void SelectStream::generateDoBlockMethod(Value * blockNo) {   
     80void SelectStream::generateDoBlockMethod() {
    8181    if (mStreamIndex >= mSizeInputStreamSet)
    8282        llvm::report_fatal_error("Stream index out of bounds.\n");
    8383   
    84     Value * strmPtr = getStream("bitStreams", blockNo, iBuilder->getInt32(mStreamIndex));
     84    Value * strmPtr = getStream("bitStreams", iBuilder->getInt32(mStreamIndex));
    8585    Value * bitStrmVal = iBuilder->CreateBlockAlignedLoad(strmPtr);
    8686
    87     Value * ptr = getStream("bitStream", blockNo, iBuilder->getInt32(0));
     87    Value * ptr = getStream("bitStream", iBuilder->getInt32(0));
    8888    iBuilder->CreateBlockAlignedStore(bitStrmVal, ptr);
    8989}
  • icGREP/icgrep-devel/icgrep/kernels/alignedprint.h

    r5296 r5297  
    1717    virtual ~PrintableBits() {}
    1818private:
    19     void generateDoBlockMethod(llvm::Value * blockNo) override;
     19    void generateDoBlockMethod() override;
    2020};
    2121
     
    2525    virtual ~SelectStream() {}
    2626private:
    27     void generateDoBlockMethod(llvm::Value * blockNo) override;
     27    void generateDoBlockMethod() override;
    2828    unsigned mSizeInputStreamSet;
    2929    unsigned mStreamIndex;
  • icGREP/icgrep-devel/icgrep/kernels/cc_kernel.cpp

    r5292 r5297  
    1616using namespace llvm;
    1717
    18 void DirectCharacterClassKernelBuilder::generateDoBlockMethod(Value *blockNo) {
     18void DirectCharacterClassKernelBuilder::generateDoBlockMethod() {
    1919    unsigned packCount = 8 * mCodeUnitSize; 
    2020    unsigned codeUnitWidth = 8 * mCodeUnitSize;
    2121    Value * codeUnitPack[packCount];
    2222    for (unsigned i = 0; i < packCount; i++) {
    23         Value * ptr = getStream("codeUnitStream", blockNo, iBuilder->getInt32(0), iBuilder->getInt32(i));
     23        Value * ptr = getInputStream("codeUnitStream", iBuilder->getInt32(0), iBuilder->getInt32(i));
    2424        codeUnitPack[i] = iBuilder->CreateBlockAlignedLoad(ptr);
    2525    }
     
    5656            theCCstream = iBuilder->simd_or(theCCstream, pack);
    5757        }
    58         Value * ptr = getStream("ccStream", blockNo, iBuilder->getInt32(j));
     58        Value * ptr = getOutputStream("ccStream", iBuilder->getInt32(j));
    5959        iBuilder->CreateBlockAlignedStore(theCCstream, ptr);
    6060    }
  • icGREP/icgrep-devel/icgrep/kernels/cc_kernel.h

    r5292 r5297  
    2727    }
    2828   
    29     void generateDoBlockMethod(llvm::Value * blockNo) override;
     29    void generateDoBlockMethod() override;
    3030
    3131private:
  • icGREP/icgrep-devel/icgrep/kernels/deletion.cpp

    r5292 r5297  
    88#include <llvm/IR/Value.h>
    99#include <llvm/IR/Module.h>
     10#include <llvm/Support/raw_ostream.h>
    1011
    1112using namespace llvm;
     
    5354// Outputs: the deleted streams, plus a partial sum popcount
    5455
    55 void DeletionKernel::generateDoBlockMethod(Value * blockNo) {
    56     Value * delMaskPtr = getStream("delMaskSet", blockNo, iBuilder->getInt32(0));
     56void DeletionKernel::generateDoBlockMethod() {
     57    Value * delMaskPtr = getInputStream("delMaskSet", iBuilder->getInt32(0));
    5758    Value * delMask = iBuilder->CreateBlockAlignedLoad(delMaskPtr);
    58     std::vector<Value *> move_masks = parallel_prefix_deletion_masks(iBuilder, mDeletionFieldWidth, delMask);
     59    const auto move_masks = parallel_prefix_deletion_masks(iBuilder, mDeletionFieldWidth, delMask);
    5960    for (unsigned j = 0; j < mStreamCount; ++j) {
    60         Value * inputStreamPtr = getStream("inputStreamSet", blockNo, iBuilder->getInt32(j));
     61        Value * inputStreamPtr = getInputStream("inputStreamSet", iBuilder->getInt32(j));
    6162        Value * input = iBuilder->CreateBlockAlignedLoad(inputStreamPtr);
    6263        Value * output = apply_parallel_prefix_deletion(iBuilder, mDeletionFieldWidth, delMask, move_masks, input);
    63         Value * outputStreamPtr = getStream("outputStreamSet", blockNo, iBuilder->getInt32(j));
     64        Value * outputStreamPtr = getOutputStream("outputStreamSet", iBuilder->getInt32(j));
    6465        iBuilder->CreateBlockAlignedStore(output, outputStreamPtr);
    6566    }
    6667    Value * delCount = partial_sum_popcount(iBuilder, mDeletionFieldWidth, iBuilder->simd_not(delMask));
    67     Value * delCountPtr = getStream("deletionCounts", blockNo, iBuilder->getInt32(0));
     68    Value * delCountPtr = getOutputStream("deletionCounts", iBuilder->getInt32(0));
    6869    iBuilder->CreateBlockAlignedStore(iBuilder->bitCast(delCount), delCountPtr);
    6970    // Stream deletion has only been applied within fields; the actual number of data items has not yet changed.
     
    7475}
    7576
    76 void DeletionKernel::generateFinalBlockMethod(Value * remainingBytes, Value * blockNo) {
     77void DeletionKernel::generateFinalBlockMethod(Value * remainingBytes) {
    7778    IntegerType * vecTy = iBuilder->getIntNTy(iBuilder->getBitBlockWidth());
    7879    Value * remaining = iBuilder->CreateZExt(remainingBytes, vecTy);
    7980    Value * EOF_del = iBuilder->bitCast(iBuilder->CreateShl(Constant::getAllOnesValue(vecTy), remaining));
    80     Value * const delmaskPtr = getStream("delMaskSet", blockNo, iBuilder->getInt32(0));
     81    Value * const delmaskPtr = getInputStream("delMaskSet", iBuilder->getInt32(0));
    8182    Value * const delmaskVal = iBuilder->CreateBlockAlignedLoad(delmaskPtr);
    8283    iBuilder->CreateBlockAlignedStore(iBuilder->CreateOr(EOF_del, delmaskVal), delmaskPtr);
  • icGREP/icgrep-devel/icgrep/kernels/deletion.h

    r5292 r5297  
    2929protected:
    3030
    31     void generateDoBlockMethod(llvm::Value * blockNo) override;
     31    void generateDoBlockMethod() override;
    3232
    33     void generateFinalBlockMethod(llvm::Value * remainingBytes, llvm::Value * blockNo) override;
     33    void generateFinalBlockMethod(llvm::Value * remainingBytes) override;
    3434
    3535private:
  • icGREP/icgrep-devel/icgrep/kernels/evenodd.cpp

    r5292 r5297  
    1111namespace kernel {
    1212
    13 void EvenOddKernel::generateDoBlockMethod(Value * blockNo) {
     13void EvenOddKernel::generateDoBlockMethod() {
    1414    Value * even = iBuilder->simd_fill(64, iBuilder->getInt64(0x5555555555555555));
    1515    Value * odd = iBuilder->bitCast(iBuilder->simd_fill(8, iBuilder->getInt8(0xAA)));
    16     Value * evenBitsPtr = getStream("even_odd", blockNo, iBuilder->getInt32(0));
     16    Value * evenBitsPtr = getOutputStream("even_odd", iBuilder->getInt32(0));
    1717    iBuilder->CreateBlockAlignedStore(even, evenBitsPtr);
    18     Value * oddBitsPtr = getStream("even_odd", blockNo, iBuilder->getInt32(1));
     18    Value * oddBitsPtr = getOutputStream("even_odd", iBuilder->getInt32(1));
    1919    iBuilder->CreateBlockAlignedStore(odd, oddBitsPtr);
    2020}
  • icGREP/icgrep-devel/icgrep/kernels/evenodd.h

    r5292 r5297  
    1717    virtual ~EvenOddKernel() {}
    1818private:
    19     void generateDoBlockMethod(llvm::Value * blockNo) override;
     19    void generateDoBlockMethod() override;
    2020
    2121};
  • icGREP/icgrep-devel/icgrep/kernels/interface.cpp

    r5292 r5297  
    2727    iBuilder->setModule(client);
    2828    if (mKernelStateType == nullptr) {
    29         throw std::runtime_error("Kernel interface " + mKernelName + " not yet finalized.");
     29        throw std::runtime_error("Kernel interface " + getName() + " not yet finalized.");
    3030    }
    3131    PointerType * selfType = PointerType::getUnqual(mKernelStateType);
     
    3737    }
    3838    FunctionType * initType = FunctionType::get(iBuilder->getVoidTy(), initParameters, false);
    39     Function * init = Function::Create(initType, GlobalValue::ExternalLinkage, mKernelName + INIT_SUFFIX, client);
     39    Function * init = Function::Create(initType, GlobalValue::ExternalLinkage, getName() + INIT_SUFFIX, client);
    4040    init->setCallingConv(CallingConv::C);
    4141    init->setDoesNotThrow();
    4242    auto args = init->arg_begin();
    43     args++->setName("self");
     43    args->setName("self");
    4444    for (auto binding : mScalarInputs) {
    45         args++->setName(binding.name);
     45        (++args)->setName(binding.name);
    4646    }
    47     assert (args == init->arg_end());
     47    assert ((++args) == init->arg_end());
    4848
    4949    // Create the doSegment function prototype.
     
    5353    }
    5454    FunctionType * doSegmentType = FunctionType::get(iBuilder->getVoidTy(), doSegmentParameters, false);
    55     Function * doSegment = Function::Create(doSegmentType, GlobalValue::ExternalLinkage, mKernelName + DO_SEGMENT_SUFFIX, client);
     55    Function * doSegment = Function::Create(doSegmentType, GlobalValue::ExternalLinkage, getName() + DO_SEGMENT_SUFFIX, client);
    5656    doSegment->setCallingConv(CallingConv::C);
    5757    doSegment->setDoesNotThrow();
    5858    doSegment->setDoesNotCapture(1); // for self parameter only.
    5959    args = doSegment->arg_begin();
    60     args++->setName("self");
    61     args++->setName("doFinal");
     60    args->setName("self");
     61    (++args)->setName("doFinal");
    6262    for (auto ss : mStreamSetInputs) {
    63         args++->setName(ss.name + "_availableItems");
     63        (++args)->setName(ss.name + "_availableItems");
    6464    }
    65     assert (args == doSegment->arg_end());
     65    assert ((++args) == doSegment->arg_end());
    6666
    6767    // Add any additional kernel declarations
     
    7171    for (const auto & binding : mScalarOutputs) {
    7272        FunctionType * accumFnType = FunctionType::get(binding.type, {selfType}, false);
    73         Function * accumFn = Function::Create(accumFnType, GlobalValue::ExternalLinkage, mKernelName + ACCUMULATOR_INFIX + binding.name, client);
     73        Function * accumFn = Function::Create(accumFnType, GlobalValue::ExternalLinkage, getName() + ACCUMULATOR_INFIX + binding.name, client);
    7474        accumFn->setCallingConv(CallingConv::C);
    7575        accumFn->setDoesNotThrow();
    76         auto self = accumFn->arg_begin();
    77         self->setName("self");
     76        accumFn->setDoesNotCapture(1);
     77        auto args = accumFn->arg_begin();
     78        args->setName("self");
     79        assert ((++args) == accumFn->arg_end());
    7880    }
    7981
     
    8789
    8890llvm::Function * KernelInterface::getAccumulatorFunction(const std::string & accumName) const {
    89     const auto name = mKernelName + ACCUMULATOR_INFIX + accumName;
     91    const auto name = getName() + ACCUMULATOR_INFIX + accumName;
    9092    Function * f = iBuilder->getModule()->getFunction(name);
    9193    if (LLVM_UNLIKELY(f == nullptr)) {
     
    9698
    9799Function * KernelInterface::getInitFunction() const {
    98     const auto name = mKernelName + INIT_SUFFIX;
     100    const auto name = getName() + INIT_SUFFIX;
    99101    Function * f = iBuilder->getModule()->getFunction(name);
    100102    if (LLVM_UNLIKELY(f == nullptr)) {
     
    105107
    106108Function * KernelInterface::getDoSegmentFunction() const {
    107     const auto name = mKernelName + DO_SEGMENT_SUFFIX;
     109    const auto name = getName() + DO_SEGMENT_SUFFIX;
    108110    Function * f = iBuilder->getModule()->getFunction(name);
    109111    if (LLVM_UNLIKELY(f == nullptr)) {
  • icGREP/icgrep-devel/icgrep/kernels/interface.h

    r5292 r5297  
    3636     */
    3737   
    38     const std::string & getName() const { return mKernelName;}
     38    const std::string & getName() const { return mKernelName; }
    3939       
    40     const std::vector<Binding> & getStreamInputs() const {return mStreamSetInputs;}
     40    const std::vector<Binding> & getStreamInputs() const { return mStreamSetInputs; }
    4141
    42     const std::vector<Binding> & getStreamOutputs() const {return mStreamSetOutputs;}
     42    const std::vector<Binding> & getStreamOutputs() const { return mStreamSetOutputs; }
    4343
    44     const std::vector<Binding> & getScalarInputs() const { return mScalarInputs;}
     44    const std::vector<Binding> & getScalarInputs() const { return mScalarInputs; }
    4545
    46     const std::vector<Binding> & getScalarOutputs() const { return mScalarOutputs;}
     46    const std::vector<Binding> & getScalarOutputs() const { return mScalarOutputs; }
    4747       
    4848    // Add ExternalLinkage method declarations for the kernel to a given client module.
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5292 r5297  
    55
    66#include "kernel.h"
    7 #include <llvm/IR/Value.h>               // for Value
    8 #include <llvm/Support/ErrorHandling.h>  // for report_fatal_error
    9 #include <toolchain.h>                   // for BufferSegments, SegmentSize
    10 #include <kernels/streamset.h>           // for StreamSetBuffer
    11 #include <llvm/ADT/StringRef.h>          // for StringRef, operator==
    12 #include <llvm/IR/CallingConv.h>         // for ::C
    13 #include <llvm/IR/Constant.h>            // for Constant
    14 #include <llvm/IR/Constants.h>           // for ConstantInt
    15 #include <llvm/IR/Function.h>            // for Function, Function::arg_iter...
    16 #include <llvm/IR/Instructions.h>        // for LoadInst (ptr only), PHINode
     7#include <toolchain.h>
     8#include <kernels/streamset.h>
     9#include <IR_Gen/types/streamtype.h>
     10#include <llvm/IR/Constants.h>
     11#include <llvm/IR/Function.h>
     12#include <llvm/IR/Instructions.h>
    1713#include <llvm/IR/Module.h>
    18 #include <llvm/Support/Compiler.h>       // for LLVM_UNLIKELY
    1914#include <llvm/Support/raw_ostream.h>
    20 namespace llvm { class BasicBlock; }
    21 namespace llvm { class Type; }
    2215
    2316static const auto BLOCK_NO_SCALAR = "blockNo";
     
    4235using namespace kernel;
    4336using namespace parabix;
     37
     38using StreamType = IDISA::StreamType;
    4439
    4540unsigned KernelBuilder::addScalar(Type * const type, const std::string & name) {
     
    8479        throw std::runtime_error(out.str());
    8580    }
    86     int streamSetNo = 0;
    8781    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
    8882        if ((mStreamSetInputBuffers[i]->getBufferSize() > 0) && (mStreamSetInputBuffers[i]->getBufferSize() < codegen::SegmentSize + (blockSize + mLookAheadPositions - 1)/blockSize)) {
    8983             llvm::report_fatal_error("Kernel preparation: Buffer size too small " + mStreamSetInputs[i].name);
    9084        }
    91         mScalarInputs.push_back(Binding{mStreamSetInputBuffers[i]->getStreamBufferPointerType(), mStreamSetInputs[i].name + BUFFER_PTR_SUFFIX});
    92         mStreamSetNameMap.emplace(mStreamSetInputs[i].name, streamSetNo);
     85        mScalarInputs.push_back(Binding{mStreamSetInputBuffers[i]->getPointerType(), mStreamSetInputs[i].name + BUFFER_PTR_SUFFIX});
     86        mStreamSetNameMap.emplace(mStreamSetInputs[i].name, i);
    9387        addScalar(iBuilder->getSizeTy(), mStreamSetInputs[i].name + PROCESSED_ITEM_COUNT_SUFFIX);
    94         streamSetNo++;
    9588    }
    9689    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    97         mScalarInputs.push_back(Binding{mStreamSetOutputBuffers[i]->getStreamBufferPointerType(), mStreamSetOutputs[i].name + BUFFER_PTR_SUFFIX});
    98         mStreamSetNameMap.emplace(mStreamSetOutputs[i].name, streamSetNo);
     90        mScalarInputs.push_back(Binding{mStreamSetOutputBuffers[i]->getPointerType(), mStreamSetOutputs[i].name + BUFFER_PTR_SUFFIX});
     91        mStreamSetNameMap.emplace(mStreamSetOutputs[i].name, mStreamSetInputs.size() + i);
    9992        addScalar(iBuilder->getSizeTy(), mStreamSetOutputs[i].name + PRODUCED_ITEM_COUNT_SUFFIX);
    100         streamSetNo++;
    10193    }
    10294    for (auto binding : mScalarInputs) {
     
    112104    addScalar(iBuilder->getSizeTy(), LOGICAL_SEGMENT_NO_SCALAR);
    113105    addScalar(iBuilder->getInt1Ty(), TERMINATION_SIGNAL);
    114     mKernelStateType = StructType::create(iBuilder->getContext(), mKernelFields, mKernelName);
     106    mKernelStateType = StructType::create(iBuilder->getContext(), mKernelFields, getName());
    115107}
    116108
     
    118110    auto saveModule = iBuilder->getModule();
    119111    auto savePoint = iBuilder->saveIP();
    120     auto module = make_unique<Module>(mKernelName + "_" + iBuilder->getBitBlockTypeName(), iBuilder->getContext());
     112    auto module = make_unique<Module>(getName() + "_" + iBuilder->getBitBlockTypeName(), iBuilder->getContext());
    121113    iBuilder->setModule(module.get());
    122114    generateKernel(inputs, outputs);
     
    150142void KernelBuilder::callGenerateDoSegmentMethod() {
    151143    mCurrentFunction = getDoSegmentFunction();
    152     iBuilder->SetInsertPoint(CreateBasicBlock(mKernelName + "_entry"));
     144    iBuilder->SetInsertPoint(CreateBasicBlock(getName() + "_entry"));
    153145    auto args = mCurrentFunction->arg_begin();
    154146    mSelf = &*(args++);
     
    247239
    248240Value * KernelBuilder::getBlockNo() const {
    249     Value * ptr = iBuilder->CreateGEP(mSelf, {iBuilder->getInt32(0), getScalarIndex(BLOCK_NO_SCALAR)});
    250     return iBuilder->CreateLoad(ptr);
     241    return getScalarField(mSelf, BLOCK_NO_SCALAR);
    251242}
    252243
    253244void KernelBuilder::setBlockNo(Value * value) const {
    254     Value * ptr = iBuilder->CreateGEP(mSelf, {iBuilder->getInt32(0), getScalarIndex(BLOCK_NO_SCALAR)});
    255     iBuilder->CreateStore(value, ptr);
     245    setScalarField(mSelf, BLOCK_NO_SCALAR, value);
     246}
     247
     248llvm::Value * KernelBuilder::getInputStream(const std::string & name, llvm::Value * index) const {
     249    Value * ic = getProcessedItemCount(name);
     250    const StreamSetBuffer * buf = getStreamSetBuffer(name);
     251    ic = iBuilder->CreateUDiv(ic, iBuilder->getSize(iBuilder->getBitBlockWidth()));
     252    return buf->getStream(getStreamSetBufferPtr(name), ic, index);
     253}
     254
     255llvm::Value * KernelBuilder::getInputStream(const std::string & name, llvm::Value * index1, llvm::Value * index2) const {
     256    Value * ic = getProcessedItemCount(name);
     257    const StreamSetBuffer * buf = getStreamSetBuffer(name);
     258    ic = iBuilder->CreateUDiv(ic, iBuilder->getSize(iBuilder->getBitBlockWidth()));
     259    return buf->getStream(getStreamSetBufferPtr(name), ic, index1, index2);
     260}
     261
     262llvm::Value * KernelBuilder::getOutputStream(const std::string & name, llvm::Value * index) const {
     263    Value * ic = getProducedItemCount(name);
     264    const StreamSetBuffer * buf = getStreamSetBuffer(name);
     265    ic = iBuilder->CreateUDiv(ic, iBuilder->getSize(iBuilder->getBitBlockWidth()));
     266    return buf->getStream(getStreamSetBufferPtr(name), ic, index);
     267}
     268
     269llvm::Value * KernelBuilder::getOutputStream(const std::string & name, llvm::Value * index1, llvm::Value * index2) const {
     270    Value * ic = getProducedItemCount(name);
     271    const StreamSetBuffer * buf = getStreamSetBuffer(name);
     272    ic = iBuilder->CreateUDiv(ic, iBuilder->getSize(iBuilder->getBitBlockWidth()));
     273    return buf->getStream(getStreamSetBufferPtr(name), ic, index1, index2);
     274}
     275
     276Value * KernelBuilder::getStreamView(llvm::Type * type, const std::string & name, Value * blockNo, Value * index) const {
     277    return getStreamSetBuffer(name)->getStreamView(type, getStreamSetBufferPtr(name), blockNo, index);
    256278}
    257279
     
    273295}
    274296
    275 inline Value * KernelBuilder::getStreamSetBufferPtr(llvm::Value * instance, const std::string & name) const {
    276     return getScalarField(instance, name + BUFFER_PTR_SUFFIX);
    277 }
    278 
    279 inline Value * KernelBuilder::getStreamSetBufferPtr(llvm::Value * instance, llvm::Value * index) const {
    280     return getScalarField(instance, index);
     297inline Value * KernelBuilder::getStreamSetBufferPtr(const std::string & name) const {
     298    return getScalarField(getSelf(), name + BUFFER_PTR_SUFFIX);
     299}
     300
     301inline Value * KernelBuilder::getStreamSetBufferPtr(llvm::Value * index) const {
     302    return getScalarField(getSelf(), index);
    281303}
    282304
     
    302324}
    303325
    304 Value * KernelBuilder::getStream(const std::string & name, Value * blockNo, Value * index) const {
    305     return getStreamSetBuffer(name)->getStream(getStreamSetBufferPtr(name), blockNo, index);
    306 }
    307 
    308 Value * KernelBuilder::getStream(const std::string & name, Value * blockNo, Value * index1, Value * index2) const {
    309     assert (index1->getType() == index2->getType());
    310     return getStreamSetBuffer(name)->getStream(getStreamSetBufferPtr(name), blockNo, index1, index2);
    311 }
    312 
    313 Value * KernelBuilder::getStreamView(const std::string & name, Value * blockNo, Value * index) const {
    314     return getStreamSetBuffer(name)->getStreamView(getStreamSetBufferPtr(name), blockNo, index);
    315 }
    316 
    317 Value * KernelBuilder::getStreamView(llvm::Type * type, const std::string & name, Value * blockNo, Value * index) const {
    318     return getStreamSetBuffer(name)->getStreamView(type, getStreamSetBufferPtr(name), blockNo, index);
    319 }
    320 
    321326BasicBlock * KernelBuilder::CreateBasicBlock(std::string && name) const {
    322327    return BasicBlock::Create(iBuilder->getContext(), name, mCurrentFunction);
     
    343348
    344349//  The default finalBlock method simply dispatches to the doBlock routine.
    345 void BlockOrientedKernel::generateFinalBlockMethod(Value * remainingBytes, Value * blockNo) {
     350void BlockOrientedKernel::generateFinalBlockMethod(Value * remainingBytes) {
    346351//    std::vector<Value *> args = {self};
    347352//    for (Argument & arg : function->getArgumentList()){
     
    353358//  The default doSegment method dispatches to the doBlock routine for
    354359//  each block of the given number of blocksToDo, and then updates counts.
    355 void BlockOrientedKernel::generateDoSegmentMethod(Value * doFinal, const std::vector<Value *> &producerPos) {
     360void BlockOrientedKernel::generateDoSegmentMethod(Value * doFinal, const std::vector<Value *> & producerPos) {
    356361
    357362    BasicBlock * const entryBlock = iBuilder->GetInsertBlock();
    358     BasicBlock * const strideLoopCond = CreateBasicBlock(mKernelName + "_strideLoopCond");
    359     BasicBlock * const strideLoopBody = CreateBasicBlock(mKernelName + "_strideLoopBody");
    360     BasicBlock * const stridesDone = CreateBasicBlock(mKernelName + "_stridesDone");
    361     BasicBlock * const doFinalBlock = CreateBasicBlock(mKernelName + "_doFinalBlock");
    362     BasicBlock * const segmentDone = CreateBasicBlock(mKernelName + "_segmentDone");
     363    BasicBlock * const strideLoopCond = CreateBasicBlock(getName() + "_strideLoopCond");
     364    BasicBlock * const strideLoopBody = CreateBasicBlock(getName() + "_strideLoopBody");
     365    BasicBlock * const stridesDone = CreateBasicBlock(getName() + "_stridesDone");
     366    BasicBlock * const doFinalBlock = CreateBasicBlock(getName() + "_doFinalBlock");
     367    BasicBlock * const segmentDone = CreateBasicBlock(getName() + "_segmentDone");
    363368
    364369    ConstantInt * stride = iBuilder->getSize(iBuilder->getStride());
     
    370375        availablePos = iBuilder->CreateSelect(iBuilder->CreateICmpULT(availablePos, p), availablePos, p);
    371376    }
     377
    372378    Value * processed = getProcessedItemCount(mStreamSetInputs[0].name);
    373379    Value * itemsAvail = iBuilder->CreateSub(availablePos, processed);
     
    378384    PHINode * stridesRemaining = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2, "stridesRemaining");
    379385    stridesRemaining->addIncoming(stridesToDo, entryBlock);
    380     Value * notDone = iBuilder->CreateICmpUGT(stridesRemaining, iBuilder->getSize(0));
     386    Value * notDone = iBuilder->CreateICmpNE(stridesRemaining, iBuilder->getSize(0));
    381387    iBuilder->CreateCondBr(notDone, strideLoopBody, stridesDone);
    382388
     
    387393
    388394    setBlockNo(iBuilder->CreateAdd(blockNo, strideBlocks));
     395
     396    // Update counts
     397
     398    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
     399        Value * processed = getProcessedItemCount(mStreamSetInputs[i].name);
     400        processed = iBuilder->CreateAdd(processed, stride);
     401        setProcessedItemCount(mStreamSetInputs[i].name, processed);
     402    }
     403
     404    if (!mDoBlockUpdatesProducedItemCountsAttribute) {
     405        for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
     406            Value * produced = getProducedItemCount(mStreamSetOutputs[i].name);
     407            produced = iBuilder->CreateAdd(produced, stride);
     408            setProducedItemCount(mStreamSetOutputs[i].name, produced);
     409        }
     410    }
     411
    389412    stridesRemaining->addIncoming(iBuilder->CreateSub(stridesRemaining, iBuilder->getSize(1)), strideLoopBody);
    390413    iBuilder->CreateBr(strideLoopCond);
    391414
    392415    iBuilder->SetInsertPoint(stridesDone);
    393     // Update counts for the full strides processed.
    394     Value * segmentItemsProcessed = iBuilder->CreateMul(stridesToDo, stride);
    395     for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
    396         Value * preProcessed = getProcessedItemCount(mStreamSetInputs[i].name);
    397         setProcessedItemCount(mStreamSetInputs[i].name, iBuilder->CreateAdd(preProcessed, segmentItemsProcessed));
    398     }
    399     if (!mDoBlockUpdatesProducedItemCountsAttribute) {
    400         for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    401             Value * preProduced = getProducedItemCount(mStreamSetOutputs[i].name);
    402             setProducedItemCount(mStreamSetOutputs[i].name, iBuilder->CreateAdd(preProduced, segmentItemsProcessed));
    403         }
    404     }
    405416
    406417    // Now conditionally perform the final block processing depending on the doFinal parameter.
     
    441452    mSelf = &(*args);
    442453    iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
    443     generateDoBlockMethod(getBlockNo()); // must be implemented by the KernelBuilder subtype
     454    generateDoBlockMethod(); // must be implemented by the KernelBuilder subtype
    444455    iBuilder->CreateRetVoid();
    445456}
     
    452463    Value * const remainingBytes = &(*args);
    453464    iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
    454     generateFinalBlockMethod(remainingBytes, getBlockNo()); // possibly overridden by the KernelBuilder subtype
     465    generateFinalBlockMethod(remainingBytes); // possibly overridden by the KernelBuilder subtype
    455466    iBuilder->CreateRetVoid();
    456467}
    457468
    458469Function * BlockOrientedKernel::getDoBlockFunction() const {
    459     const auto name = mKernelName + DO_BLOCK_SUFFIX;
     470    const auto name = getName() + DO_BLOCK_SUFFIX;
    460471    Function * const f = iBuilder->getModule()->getFunction(name);
    461472    if (LLVM_UNLIKELY(f == nullptr)) {
     
    470481
    471482Function * BlockOrientedKernel::getDoFinalBlockFunction() const {
    472     const auto name = mKernelName + FINAL_BLOCK_SUFFIX;
     483    const auto name = getName() + FINAL_BLOCK_SUFFIX;
    473484    Function * const f = iBuilder->getModule()->getFunction(name);
    474485    if (LLVM_UNLIKELY(f == nullptr)) {
     
    485496    // Create the doBlock and finalBlock function prototypes
    486497    FunctionType * const doBlockType = FunctionType::get(iBuilder->getVoidTy(), {selfType}, false);
    487     Function * const doBlock = Function::Create(doBlockType, GlobalValue::ExternalLinkage, mKernelName + DO_BLOCK_SUFFIX, m);
     498    Function * const doBlock = Function::Create(doBlockType, GlobalValue::ExternalLinkage, getName() + DO_BLOCK_SUFFIX, m);
    488499    doBlock->setCallingConv(CallingConv::C);
    489500    doBlock->setDoesNotThrow();
     
    491502    auto args = doBlock->arg_begin();
    492503    args->setName("self");
     504    assert ((++args) == doBlock->arg_end());
    493505
    494506    FunctionType * const finalBlockType = FunctionType::get(iBuilder->getVoidTy(), {selfType, iBuilder->getSizeTy()}, false);
    495     Function * const finalBlock = Function::Create(finalBlockType, GlobalValue::ExternalLinkage, mKernelName + FINAL_BLOCK_SUFFIX, m);
     507    Function * const finalBlock = Function::Create(finalBlockType, GlobalValue::ExternalLinkage, getName() + FINAL_BLOCK_SUFFIX, m);
    496508    finalBlock->setCallingConv(CallingConv::C);
    497509    finalBlock->setDoesNotThrow();
    498510    finalBlock->setDoesNotCapture(1);
    499511    args = finalBlock->arg_begin();
    500     args++->setName("self");
    501     args->setName("remainingBytes");
     512    args->setName("self");
     513    (++args)->setName("remainingBytes");
     514    assert ((++args) == finalBlock->arg_end());
    502515}
    503516
  • icGREP/icgrep-devel/icgrep/kernels/kernel.h

    r5292 r5297  
    5050    void setTerminationSignal(llvm::Value * instance) const override final;
    5151
    52     llvm::Value * getScalarFieldPtr(llvm::Value * instance, const std::string & name) const;
    53 
    54     llvm::Value * getScalarFieldPtr(llvm::Value *instance, llvm::Value * index) const;
    55 
    56     llvm::Value * getStreamSetBufferPtr(llvm::Value * instance, const std::string & name) const;
    57 
    58     llvm::Value * getStreamSetBufferPtr(llvm::Value * instance, llvm::Value * index) const;
    59    
    6052    // Get the value of a scalar field for a given instance.
    6153    llvm::Value * getScalarField(llvm::Value * instance, const std::string & fieldName) const;
     
    180172    }
    181173
    182     llvm::Value * getStream(const std::string & name, llvm::Value * blockNo, llvm::Value * index) const;
    183 
    184     llvm::Value * getStream(const std::string & name, llvm::Value * blockNo, llvm::Value * index1, llvm::Value * index2) const;
    185 
    186     llvm::Value * getStreamView(const std::string & name, llvm::Value * blockNo, llvm::Value * index) const;
     174    llvm::Value * getInputStream(const std::string & name, llvm::Value * index) const;
     175
     176    llvm::Value * getInputStream(const std::string & name, llvm::Value * index1, llvm::Value * index2) const;
     177
     178    llvm::Value * getOutputStream(const std::string & name, llvm::Value * index) const;
     179
     180    llvm::Value * getOutputStream(const std::string & name, llvm::Value * index1, llvm::Value * index2) const;
    187181
    188182    llvm::Value * getStreamView(llvm::Type * type, const std::string & name, llvm::Value * blockNo, llvm::Value * index) const;
    189183
    190     // Stream set helpers.
     184
    191185    llvm::Value * getScalarFieldPtr(const std::string & name) const {
    192186        return getScalarFieldPtr(getSelf(), name);
     
    197191    }
    198192
    199     llvm::Value * getStreamSetBufferPtr(const std::string & name) const {
    200         return getStreamSetBufferPtr(getSelf(), name);
    201     }
    202 
    203     llvm::Value * getStreamSetBufferPtr(llvm::Value * index) const {
    204         return getStreamSetBufferPtr(getSelf(), index);
    205     }
    206 
    207193    llvm::Value * getStreamSetPtr(const std::string & name, llvm::Value * blockNo) const;
    208194
     
    239225
    240226private:
     227
     228    // Stream set helpers.
     229//    llvm::Value * getStream(const std::string & name, llvm::Value * blockNo, llvm::Value * index) const;
     230
     231//    llvm::Value * getStream(const std::string & name, llvm::Value * blockNo, llvm::Value * index1, llvm::Value * index2) const;
     232
     233    llvm::Value * getStreamSetBufferPtr(const std::string & name) const;
     234
     235    llvm::Value * getStreamSetBufferPtr(llvm::Value * index) const;
     236
     237    llvm::Value * getScalarFieldPtr(llvm::Value * instance, const std::string & name) const;
     238
     239    llvm::Value * getScalarFieldPtr(llvm::Value * instance, llvm::Value * index) const;
    241240
    242241    unsigned getStreamSetIndex(const std::string & name) const;
     
    291290    // Each kernel builder subtype must provide its own logic for generating
    292291    // doBlock calls.
    293     virtual void generateDoBlockMethod(llvm::Value * blockNo) = 0;
     292    virtual void generateDoBlockMethod() = 0;
    294293
    295294    // Each kernel builder subtypre must also specify the logic for processing the
     
    300299    // not be overridden.
    301300
    302     virtual void generateFinalBlockMethod(llvm::Value * remainingBytes, llvm::Value * blockNo);
     301    virtual void generateFinalBlockMethod(llvm::Value * remainingBytes);
    303302
    304303    virtual void generateDoSegmentMethod(llvm::Value * doFinal, const std::vector<llvm::Value *> & producerPos) final;
  • icGREP/icgrep-devel/icgrep/kernels/p2s_kernel.cpp

    r5292 r5297  
    4040}
    4141               
    42 void P2SKernel::generateDoBlockMethod(llvm::Value * blockNo) {
     42void P2SKernel::generateDoBlockMethod() {
    4343    Value * p_bitblock[8];
    4444    for (unsigned i = 0; i < 8; i++) {
    45         Value * ptr = getStream("basisBits", blockNo, iBuilder->getInt32(i));
     45        Value * ptr = getInputStream("basisBits", iBuilder->getInt32(i));
    4646        p_bitblock[i] = iBuilder->CreateBlockAlignedLoad(ptr);
    4747    }
     
    4949    p2s(iBuilder, p_bitblock, s_bytepack);
    5050    for (unsigned j = 0; j < 8; ++j) {
    51         Value * ptr = getStream("byteStream", blockNo, iBuilder->getInt32(0), iBuilder->getInt32(j));
     51        Value * ptr = getOutputStream("byteStream", iBuilder->getInt32(0), iBuilder->getInt32(j));
    5252        iBuilder->CreateBlockAlignedStore(s_bytepack[j], ptr);
    5353    }
     
    6363   
    6464
    65 void P2SKernelWithCompressedOutput::generateDoBlockMethod(llvm::Value * blockNo) {
     65void P2SKernelWithCompressedOutput::generateDoBlockMethod() {
    6666    PointerType * i8PtrTy = iBuilder->getInt8PtrTy();
    6767    IntegerType * i32 = iBuilder->getInt32Ty();
     
    7070    Value * basisBits[8];
    7171    for (unsigned i = 0; i < 8; i++) {
    72         Value * basisBitsBlock_ptr = getStream("basisBits", blockNo, iBuilder->getInt32(i));
     72        Value * basisBitsBlock_ptr = getInputStream("basisBits", iBuilder->getInt32(i));
    7373        basisBits[i] = iBuilder->CreateBlockAlignedLoad(basisBitsBlock_ptr);
    7474    }
     
    7777
    7878    unsigned units_per_register = iBuilder->getBitBlockWidth()/8;
    79     Value * delCountBlock_ptr = getStream("deletionCounts", blockNo, iBuilder->getInt32(0));
     79    Value * delCountBlock_ptr = getInputStream("deletionCounts", iBuilder->getInt32(0));
    8080    Value * unit_counts = iBuilder->fwCast(units_per_register, iBuilder->CreateBlockAlignedLoad(delCountBlock_ptr));
    8181
    8282    Value * unitsGenerated = getProducedItemCount("byteStream"); // units generated to buffer
    83     Value * output_ptr = getStreamView(i8PtrTy, "byteStream", blockNo, iBuilder->getInt32(0));
     83    Value * output_ptr = getStreamView(i8PtrTy, "byteStream", getBlockNo(), iBuilder->getInt32(0));
    8484    Value * offset = iBuilder->getInt32(0);
    8585    for (unsigned j = 0; j < 8; ++j) {
    86         iBuilder->CreateAlignedStore(bytePack[j], iBuilder->CreateBitCast(iBuilder->CreateGEP(output_ptr, offset), bitBlockPtrTy), 1);
     86        iBuilder->CreateStore(bytePack[j], iBuilder->CreateBitCast(iBuilder->CreateGEP(output_ptr, offset), bitBlockPtrTy));
    8787        offset = iBuilder->CreateZExt(iBuilder->CreateExtractElement(unit_counts, iBuilder->getInt32(j)), i32);
    8888    }
     
    101101   
    102102
    103 void P2S16Kernel::generateDoBlockMethod(Value * blockNo) {
     103void P2S16Kernel::generateDoBlockMethod() {
    104104    Value * hi_input[8];
    105105    for (unsigned j = 0; j < 8; ++j) {
    106         Value * ptr = getStream("basisBits", blockNo, iBuilder->getInt32(0), iBuilder->getInt32(j));
     106        Value * ptr = getInputStream("basisBits", iBuilder->getInt32(0), iBuilder->getInt32(j));
    107107        hi_input[j] = iBuilder->CreateBlockAlignedLoad(ptr);
    108108    }
     
    111111    Value * lo_input[8];
    112112    for (unsigned j = 0; j < 8; ++j) {
    113         Value * ptr = getStream("basisBits", blockNo, iBuilder->getInt32(0), iBuilder->getInt32(j + 8));
     113        Value * ptr = getInputStream("basisBits", iBuilder->getInt32(0), iBuilder->getInt32(j + 8));
    114114        lo_input[j] = iBuilder->CreateBlockAlignedLoad(ptr);
    115115    }
     
    119119        Value * merge0 = iBuilder->bitCast(iBuilder->esimd_mergel(8, hi_bytes[j], lo_bytes[j]));
    120120        Value * merge1 = iBuilder->bitCast(iBuilder->esimd_mergeh(8, hi_bytes[j], lo_bytes[j]));
    121         Value * ptr0 = getStream("i16Stream", blockNo, iBuilder->getInt32(2 * j));
     121        Value * ptr0 = getOutputStream("i16Stream", iBuilder->getInt32(2 * j));
    122122        iBuilder->CreateBlockAlignedStore(merge0, ptr0);
    123         Value * ptr1 = getStream("i16Stream", blockNo, iBuilder->getInt32(2 * j + 1));
     123        Value * ptr1 = getOutputStream("i16Stream", iBuilder->getInt32(2 * j + 1));
    124124        iBuilder->CreateBlockAlignedStore(merge1, ptr1);
    125125    }
     
    136136
    137137   
    138 void P2S16KernelWithCompressedOutput::generateDoBlockMethod(Value * blockNo) {
     138void P2S16KernelWithCompressedOutput::generateDoBlockMethod() {
    139139    IntegerType * i32Ty = iBuilder->getInt32Ty();
    140140    PointerType * bitBlockPtrTy = iBuilder->getBitBlockType()->getPointerTo();
     141
    141142    Value * hi_input[8];
    142143    for (unsigned j = 0; j < 8; ++j) {
    143         Value * ptr = getStream("basisBits", blockNo, iBuilder->getInt32(j));
     144        Value * ptr = getInputStream("basisBits", iBuilder->getInt32(j));
    144145        hi_input[j] = iBuilder->CreateBlockAlignedLoad(ptr);
    145146    }
    146147    Value * hi_bytes[8];
    147148    p2s(iBuilder, hi_input, hi_bytes);
     149
    148150    Value * lo_input[8];
    149151    for (unsigned j = 0; j < 8; ++j) {
    150         Value * ptr = getStream("basisBits", blockNo, iBuilder->getInt32(j + 8));
     152        Value * ptr = getInputStream("basisBits", iBuilder->getInt32(j + 8));
    151153        lo_input[j] = iBuilder->CreateBlockAlignedLoad(ptr);
    152154    }
    153155    Value * lo_bytes[8];
    154156    p2s(iBuilder, lo_input, lo_bytes);
    155     Value * delCountBlock_ptr = getStream("deletionCounts", blockNo, iBuilder->getInt32(0));
     157
     158    Value * delCountBlock_ptr = getInputStream("deletionCounts", iBuilder->getInt32(0));
    156159    Value * unit_counts = iBuilder->fwCast(iBuilder->getBitBlockWidth() / 16, iBuilder->CreateBlockAlignedLoad(delCountBlock_ptr));
    157160    PointerType * int16PtrTy = PointerType::get(iBuilder->getInt16Ty(), 0);
     161
    158162    ConstantInt * stride = iBuilder->getSize(iBuilder->getStride());
    159163    Value * i16UnitsGenerated = getProducedItemCount("i16Stream"); // units generated to buffer
    160164    Value * i16BlockNo = iBuilder->CreateUDiv(i16UnitsGenerated, stride);
    161165    Value * u16_output_ptr = getStreamView(int16PtrTy, "i16Stream", i16BlockNo, iBuilder->CreateURem(i16UnitsGenerated, stride));
     166
    162167    Value * offset = ConstantInt::get(i32Ty, 0);
     168
    163169    for (unsigned j = 0; j < 8; ++j) {
    164170        Value * merge0 = iBuilder->bitCast(iBuilder->esimd_mergel(8, hi_bytes[j], lo_bytes[j]));
    165         Value * merge1 = iBuilder->bitCast(iBuilder->esimd_mergeh(8, hi_bytes[j], lo_bytes[j]));
    166171        iBuilder->CreateAlignedStore(merge0, iBuilder->CreateBitCast(iBuilder->CreateGEP(u16_output_ptr, offset), bitBlockPtrTy), 1);
    167172        offset = iBuilder->CreateZExt(iBuilder->CreateExtractElement(unit_counts, iBuilder->getInt32(2 * j)), i32Ty);
     173
     174        Value * merge1 = iBuilder->bitCast(iBuilder->esimd_mergeh(8, hi_bytes[j], lo_bytes[j]));
    168175        iBuilder->CreateAlignedStore(merge1, iBuilder->CreateBitCast(iBuilder->CreateGEP(u16_output_ptr, offset), bitBlockPtrTy), 1);
    169176        offset = iBuilder->CreateZExt(iBuilder->CreateExtractElement(unit_counts, iBuilder->getInt32(2 * j + 1)), i32Ty);
  • icGREP/icgrep-devel/icgrep/kernels/p2s_kernel.h

    r5292 r5297  
    1616    P2SKernel(IDISA::IDISA_Builder * iBuilder);
    1717private:
    18     void generateDoBlockMethod(llvm::Value * blockNo) override;
     18    void generateDoBlockMethod() override;
    1919};
    2020
     
    2323    P2SKernelWithCompressedOutput(IDISA::IDISA_Builder * iBuilder);   
    2424private:
    25     void generateDoBlockMethod(llvm::Value * blockNo) override;
     25    void generateDoBlockMethod() override;
    2626};
    2727
     
    3030    P2S16Kernel(IDISA::IDISA_Builder * iBuilder);   
    3131private:
    32     void generateDoBlockMethod(llvm::Value * blockNo) override;
     32    void generateDoBlockMethod() override;
    3333};
    3434   
     
    3737    P2S16KernelWithCompressedOutput(IDISA::IDISA_Builder * iBuilder);
    3838private:
    39     void generateDoBlockMethod(llvm::Value * blockNo) override;
     39    void generateDoBlockMethod() override;
    4040};
    4141   
  • icGREP/icgrep-devel/icgrep/kernels/radix64.cpp

    r5292 r5297  
    100100    Value * loopItemsToDo = iBuilder->CreateSub(itemsAvail, excessItems);
    101101
    102     Value * blockNo = getBlockNo();
    103 
    104102    // A block is made up of 8 packs.  Get the pointer to the first pack (changes the type of the pointer only).
    105     Value * sourcePackPtr = getStream("sourceStream", blockNo, iBuilder->getInt32(0), iBuilder->getInt32(0));
    106 
    107     Value * outputGenerated = getProducedItemCount("expandedStream"); // bytes previously generated to output
    108     Value * outputBlockNo = iBuilder->CreateUDiv(outputGenerated, stride);
    109     Value * outputPackPtr = getStream("expandedStream", outputBlockNo, iBuilder->getInt32(0), iBuilder->getInt32(0));
     103    Value * sourcePackPtr = getInputStream("sourceStream", iBuilder->getInt32(0), iBuilder->getInt32(0));
     104    Value * outputPackPtr = getOutputStream("expandedStream", iBuilder->getInt32(0), iBuilder->getInt32(0));
    110105
    111106    Value * hasFullLoop = iBuilder->CreateICmpUGE(loopItemsToDo, triplePackSize);
     
    124119    Value * pack0 = iBuilder->fwCast(8, iBuilder->CreateAlignedLoad(loopInput_ptr, packAlign));
    125120    Value * expand0 = iBuilder->bitCast(iBuilder->CreateShuffleVector(undefPack, pack0, expand_3_4_shuffle[0]));
    126     iBuilder->CreateAlignedStore(expand0, loopOutput_ptr, packAlign);
     121    iBuilder->CreateBlockAlignedStore(expand0, loopOutput_ptr);
    127122    // Step 2 of the main loop.
    128123    Value * inPack1_ptr = iBuilder->CreateGEP(loopInput_ptr, iBuilder->getInt32(1));
     
    130125    Value * pack1 = iBuilder->fwCast(8, iBuilder->CreateAlignedLoad(inPack1_ptr, packAlign));
    131126    Value * expand1 = iBuilder->bitCast(iBuilder->CreateShuffleVector(pack0, pack1, expand_3_4_shuffle[1]));
    132     iBuilder->CreateAlignedStore(expand1, outPack1_ptr, packAlign);
     127    iBuilder->CreateBlockAlignedStore(expand1, outPack1_ptr);
    133128    // Step 3 of the main loop.
    134129    Value * inPack2_ptr = iBuilder->CreateGEP(loopInput_ptr, iBuilder->getInt32(2));
     
    136131    Value * pack2 = iBuilder->fwCast(8, iBuilder->CreateAlignedLoad(inPack2_ptr, packAlign));
    137132    Value * expand2 = iBuilder->bitCast(iBuilder->CreateShuffleVector(pack1, pack2, expand_3_4_shuffle[2]));
    138     iBuilder->CreateAlignedStore(expand2, outPack2_ptr, packAlign);
     133    iBuilder->CreateBlockAlignedStore(expand2, outPack2_ptr);
    139134    Value * outPack3_ptr = iBuilder->CreateGEP(loopOutput_ptr, iBuilder->getInt32(3));
    140135    Value * expand3 = iBuilder->bitCast(iBuilder->CreateShuffleVector(pack2, undefPack, expand_3_4_shuffle[3]));
    141     iBuilder->CreateAlignedStore(expand3, outPack3_ptr, packAlign);
     136    iBuilder->CreateBlockAlignedStore(expand3, outPack3_ptr);
    142137
    143138    Value * loopNextInputPack = iBuilder->CreateGEP(loopInput_ptr, iBuilder->getInt32(3));
     
    262257//                                   ba    bits to move 12 positions left
    263258//    xwvuts|  nlkjzy|  barqpm|  hgfedc    Target
    264 Value* radix64Kernel::processPackData(llvm::Value* bytepack) const {
     259inline Value * radix64Kernel::processPackData(llvm::Value * bytepack) const {
     260
    265261    Value * step_right_6 = iBuilder->simd_fill(32, ConstantInt::get(iBuilder->getInt32Ty(), 0x00C00000));
     262    Value * right_6_result = iBuilder->simd_srli(32, iBuilder->simd_and(bytepack, step_right_6), 6);
     263
    266264    Value * step_left_8 = iBuilder->simd_fill(32, ConstantInt::get(iBuilder->getInt32Ty(), 0x003F0000));
     265    Value * left_8_result = iBuilder->simd_slli(32, iBuilder->simd_and(bytepack, step_left_8), 8);
     266    Value * mid = iBuilder->simd_or(right_6_result, left_8_result);
     267
    267268    Value * step_right_4 = iBuilder->simd_fill(32, ConstantInt::get(iBuilder->getInt32Ty(), 0x0000F000));
     269    Value * right_4_result = iBuilder->simd_srli(32, iBuilder->simd_and(bytepack, step_right_4), 4);
     270    mid = iBuilder->simd_or(mid, right_4_result);
     271
    268272    Value * step_left_10 = iBuilder->simd_fill(32, ConstantInt::get(iBuilder->getInt32Ty(), 0x00000F00));
     273    Value * left_10_result = iBuilder->simd_slli(32, iBuilder->simd_and(bytepack, step_left_10), 10);
     274    mid = iBuilder->simd_or(mid, left_10_result);
     275
    269276    Value * step_right_2 = iBuilder->simd_fill(32, ConstantInt::get(iBuilder->getInt32Ty(), 0x000000FC));
     277    Value * right_2_result = iBuilder->simd_srli(32, iBuilder->simd_and(bytepack, step_right_2), 2);
     278    mid = iBuilder->simd_or(mid, right_2_result);
     279
    270280    Value * step_left_12 = iBuilder->simd_fill(32, ConstantInt::get(iBuilder->getInt32Ty(), 0x00000003));
    271 
    272     Value * right_6_result = iBuilder->simd_srli(32, iBuilder->simd_and(bytepack, step_right_6), 6);
    273     Value * right_4_result = iBuilder->simd_srli(32, iBuilder->simd_and(bytepack, step_right_4), 4);
    274     Value * right_2_result = iBuilder->simd_srli(32, iBuilder->simd_and(bytepack, step_right_2), 2);
    275     Value * left_8_result = iBuilder->simd_slli(32, iBuilder->simd_and(bytepack, step_left_8), 8);
    276     Value * left_10_result = iBuilder->simd_slli(32, iBuilder->simd_and(bytepack, step_left_10), 10);
    277281    Value * left_12_result = iBuilder->simd_slli(32, iBuilder->simd_and(bytepack, step_left_12), 12);
    278 
    279     Value * mid = right_6_result;
    280     mid = iBuilder->simd_or(mid, right_4_result);
    281     mid = iBuilder->simd_or(mid, right_2_result);
    282     mid = iBuilder->simd_or(mid, left_8_result);
    283     mid = iBuilder->simd_or(mid, left_10_result);
    284282    mid = iBuilder->simd_or(mid, left_12_result);
    285     Value * radix64pack = iBuilder->bitCast(mid);
    286     return radix64pack;
    287 }
    288 
    289 void radix64Kernel::generateDoBlockMethod(Value * blockNo) {
     283
     284    return iBuilder->bitCast(mid);
     285}
     286
     287void radix64Kernel::generateDoBlockMethod() {
    290288    for (unsigned i = 0; i < 8; i++) {
    291         Value * expandedStream = getStream("expandedStream", blockNo, iBuilder->getInt32(0), iBuilder->getInt32(i));
     289        Value * expandedStream = getInputStream("expandedStream", iBuilder->getInt32(0), iBuilder->getInt32(i));
    292290        Value * bytepack = iBuilder->CreateBlockAlignedLoad(expandedStream);
    293291        Value * radix64pack = processPackData(bytepack);
    294         Value * radix64stream = getStream("radix64stream",blockNo, iBuilder->getInt32(0), iBuilder->getInt32(i));
     292        Value * radix64stream = getOutputStream("radix64stream",iBuilder->getInt32(0), iBuilder->getInt32(i));
    295293        iBuilder->CreateBlockAlignedStore(radix64pack, radix64stream);
    296294    }
     
    300298}
    301299
    302 void radix64Kernel::generateFinalBlockMethod(Value * remainingBytes, Value * blockNo) {
     300void radix64Kernel::generateFinalBlockMethod(Value * remainingBytes) {
    303301
    304302    BasicBlock * entry = iBuilder->GetInsertBlock();
     
    321319    loopRemain->addIncoming(remainingBytes, entry);
    322320
    323     Value * expandedStreamLoopPtr = getStream("expandedStream", blockNo, iBuilder->getInt32(0), idx);
     321    Value * expandedStreamLoopPtr = getInputStream("expandedStream", iBuilder->getInt32(0), idx);
    324322    Value * bytepack = iBuilder->CreateBlockAlignedLoad(expandedStreamLoopPtr);
    325323    Value * radix64pack = processPackData(bytepack);
    326324
    327     Value * radix64streamPtr = getStream("radix64stream", blockNo, iBuilder->getInt32(0), idx);
     325    Value * radix64streamPtr = getOutputStream("radix64stream", iBuilder->getInt32(0), idx);
    328326    iBuilder->CreateBlockAlignedStore(radix64pack, radix64streamPtr);
    329327
     
    347345}
    348346
    349 llvm::Value* base64Kernel::processPackData(llvm::Value* bytepack) const {
     347inline llvm::Value* base64Kernel::processPackData(llvm::Value* bytepack) const {
    350348    Value * mask_gt_25 = iBuilder->simd_ugt(8, bytepack, iBuilder->simd_fill(8, iBuilder->getInt8(25)));
    351349    Value * mask_gt_51 = iBuilder->simd_ugt(8, bytepack, iBuilder->simd_fill(8, iBuilder->getInt8(51)));
     
    362360    Value * t0_61 = iBuilder->simd_sub(8, t0_51, iBuilder->simd_and(mask_gt_51, iBuilder->simd_fill(8, iBuilder->getInt8(75))));
    363361    Value * t0_62 = iBuilder->simd_sub(8, t0_61, iBuilder->simd_and(mask_eq_62, iBuilder->simd_fill(8, iBuilder->getInt8(15))));
    364     Value * base64pack = iBuilder->simd_sub(8, t0_62, iBuilder->simd_and(mask_eq_63, iBuilder->simd_fill(8, iBuilder->getInt8(12))));
    365     return base64pack;
    366 }
    367 
    368 void base64Kernel::generateDoBlockMethod(Value * blockNo) {
     362    return iBuilder->simd_sub(8, t0_62, iBuilder->simd_and(mask_eq_63, iBuilder->simd_fill(8, iBuilder->getInt8(12))));
     363}
     364
     365void base64Kernel::generateDoBlockMethod() {
    369366    for (unsigned i = 0; i < 8; i++) {
    370         Value * radix64stream_ptr = getStream("radix64stream", blockNo, iBuilder->getInt32(0), iBuilder->getInt32(i));
     367        Value * radix64stream_ptr = getInputStream("radix64stream", iBuilder->getInt32(0), iBuilder->getInt32(i));
    371368        Value * bytepack = iBuilder->CreateBlockAlignedLoad(radix64stream_ptr);
    372 
    373369        Value* base64pack = processPackData(bytepack);
    374 
    375         Value * base64stream_ptr = getStream("base64stream", blockNo, iBuilder->getInt32(0), iBuilder->getInt32(i));
     370        Value * base64stream_ptr = getOutputStream("base64stream", iBuilder->getInt32(0), iBuilder->getInt32(i));
    376371        iBuilder->CreateBlockAlignedStore(iBuilder->bitCast(base64pack), base64stream_ptr);
    377372    }
     
    384379// of 4 bytes.   When the number of radix 64 values is not a multiple of 4
    385380// number of radix 64 values
    386 void base64Kernel::generateFinalBlockMethod(Value * remainingBytes, Value * blockNo) {
     381void base64Kernel::generateFinalBlockMethod(Value * remainingBytes) {
    387382
    388383    BasicBlock * entry = iBuilder->GetInsertBlock();
     
    407402    idx->addIncoming(ConstantInt::getNullValue(iBuilder->getInt32Ty()), entry);
    408403    loopRemain->addIncoming(remainingBytes, entry);
    409     Value * radix64streamPtr = getStream("radix64stream", blockNo, iBuilder->getInt32(0), idx);
     404    Value * radix64streamPtr = getInputStream("radix64stream", iBuilder->getInt32(0), idx);
    410405    Value * bytepack = iBuilder->CreateBlockAlignedLoad(radix64streamPtr);
    411406    Value * base64pack = processPackData(bytepack);
    412     Value * base64streamPtr = getStream("base64stream", blockNo, iBuilder->getInt32(0), idx);
     407    Value * base64streamPtr = getOutputStream("base64stream", iBuilder->getInt32(0), idx);
    413408    iBuilder->CreateBlockAlignedStore(iBuilder->bitCast(base64pack), base64streamPtr);
    414409    idx->addIncoming(iBuilder->CreateAdd(idx, ConstantInt::get(iBuilder->getInt32Ty(), 1)), base64_loop);
     
    423418
    424419    iBuilder->SetInsertPoint(doPadding);
    425     Value * i8output_ptr = getStreamView(iBuilder->getInt8PtrTy(), "base64stream", blockNo, iBuilder->getInt32(0));
     420    Value * i8output_ptr = getStreamView(iBuilder->getInt8PtrTy(), "base64stream", getBlockNo(), iBuilder->getInt32(0));
    426421    iBuilder->CreateStore(ConstantInt::get(iBuilder->getInt8Ty(), '='), iBuilder->CreateGEP(i8output_ptr, remainingBytes));
    427422    iBuilder->CreateCondBr(iBuilder->CreateICmpEQ(remainMod4, iBuilder->getSize(3)), fbExit, doPadding2);
     
    437432expand3_4Kernel::expand3_4Kernel(IDISA::IDISA_Builder * iBuilder)
    438433: SegmentOrientedKernel(iBuilder, "expand3_4",
    439               {Binding{iBuilder->getStreamSetTy(1, 8), "sourceStream"}},
    440               {Binding{iBuilder->getStreamSetTy(1, 8), "expandedStream"}},
    441               {}, {}, {}) {
     434            {Binding{iBuilder->getStreamSetTy(1, 8), "sourceStream"}},
     435            {Binding{iBuilder->getStreamSetTy(1, 8), "expandedStream"}},
     436            {}, {}, {}) {
    442437    setDoBlockUpdatesProducedItemCountsAttribute(true);
    443438}
    444439
    445440radix64Kernel::radix64Kernel(IDISA::IDISA_Builder * iBuilder)
    446 : BlockOrientedKernel(iBuilder, "radix64", {Binding{iBuilder->getStreamSetTy(1, 8), "expandedStream"}}, {Binding{iBuilder->getStreamSetTy(1, 8), "radix64stream"}}, {}, {}, {}) {
     441: BlockOrientedKernel(iBuilder, "radix64",
     442            {Binding{iBuilder->getStreamSetTy(1, 8), "expandedStream"}},
     443            {Binding{iBuilder->getStreamSetTy(1, 8), "radix64stream"}},
     444            {}, {}, {}) {
    447445    setDoBlockUpdatesProducedItemCountsAttribute(true);
    448446}
    449447
    450448base64Kernel::base64Kernel(IDISA::IDISA_Builder * iBuilder)
    451 : BlockOrientedKernel(iBuilder, "base64", {Binding{iBuilder->getStreamSetTy(1, 8), "radix64stream"}}, {Binding{iBuilder->getStreamSetTy(1, 8), "base64stream"}}, {}, {}, {}) {
     449: BlockOrientedKernel(iBuilder, "base64",
     450            {Binding{iBuilder->getStreamSetTy(1, 8), "radix64stream"}},
     451            {Binding{iBuilder->getStreamSetTy(1, 8), "base64stream"}},
     452            {}, {}, {}) {
    452453    setDoBlockUpdatesProducedItemCountsAttribute(true);
    453454}
  • icGREP/icgrep-devel/icgrep/kernels/radix64.h

    r5292 r5297  
    3030    radix64Kernel(IDISA::IDISA_Builder * iBuilder);
    3131private:
    32     virtual void generateDoBlockMethod(llvm::Value * blockNo) override final;
    33     virtual void generateFinalBlockMethod(llvm::Value * remainingBytes, llvm::Value * blockNo) override final;
     32    virtual void generateDoBlockMethod() override final;
     33    virtual void generateFinalBlockMethod(llvm::Value * remainingBytes) override final;
    3434    llvm::Value* processPackData(llvm::Value* packData) const;
    3535};
     
    3939    base64Kernel(IDISA::IDISA_Builder * iBuilder);
    4040private:
    41     virtual void generateDoBlockMethod(llvm::Value * blockNo) override final;
    42     virtual void generateFinalBlockMethod(llvm::Value * remainingBytes, llvm::Value * blockNo) override final;
     41    virtual void generateDoBlockMethod() override final;
     42    virtual void generateFinalBlockMethod(llvm::Value * remainingBytes) override final;
    4343    llvm::Value* processPackData(llvm::Value* packData) const;
    4444};
  • icGREP/icgrep-devel/icgrep/kernels/s2p_kernel.cpp

    r5292 r5297  
    88#include <llvm/IR/Constant.h>      // for Constant
    99#include <llvm/IR/Module.h>
     10#include <llvm/Support/raw_ostream.h>
    1011namespace llvm { class BasicBlock; }
    1112namespace llvm { class Function; }
     
    121122#endif
    122123   
    123 void S2PKernel::generateDoBlockMethod(llvm::Value * blockNo) {
     124void S2PKernel::generateDoBlockMethod() {
    124125    Value * bytepack[8];
    125126    for (unsigned i = 0; i < 8; i++) {
    126         Value * byteStream = getStream("byteStream", blockNo, iBuilder->getInt32(0), iBuilder->getInt32(i));
     127        Value * byteStream = getInputStream("byteStream", iBuilder->getInt32(0), iBuilder->getInt32(i));
    127128        bytepack[i] = iBuilder->CreateBlockAlignedLoad(byteStream);
    128129    }
     
    130131    s2p(iBuilder, bytepack, basisbits);
    131132    for (unsigned i = 0; i < 8; ++i) {
    132         Value * basisBits = getStream("basisBits", blockNo, iBuilder->getInt32(i));
     133        Value * basisBits = getOutputStream("basisBits", iBuilder->getInt32(i));
    133134        iBuilder->CreateBlockAlignedStore(basisbits[i], basisBits);
    134135    }
    135136}
    136137
    137 void S2PKernel::generateFinalBlockMethod(Value * remainingBytes, llvm::Value * blockNo) {
     138void S2PKernel::generateFinalBlockMethod(Value * remainingBytes) {
    138139    /* Prepare the s2p final block function:
    139140     assumption: if remaining bytes is greater than 0, it is safe to read a full block of bytes.
     
    153154   
    154155    iBuilder->SetInsertPoint(finalEmptyBlock);
    155     Value * basisBitsPtr = getStreamView("basisBits", blockNo, iBuilder->getInt64(0));
    156     iBuilder->CreateStore(Constant::getNullValue(basisBitsPtr->getType()->getPointerElementType()), basisBitsPtr);
     156
     157    for (unsigned i = 0; i < 8; ++i) {
     158        Value * basisBitsPtr = getOutputStream("basisBits", iBuilder->getInt64(i));
     159        iBuilder->CreateBlockAlignedStore(Constant::getNullValue(iBuilder->getBitBlockType()), basisBitsPtr);
     160    }
     161
    157162    iBuilder->CreateBr(exitBlock);
    158163   
  • icGREP/icgrep-devel/icgrep/kernels/s2p_kernel.h

    r5292 r5297  
    1717    virtual ~S2PKernel() {}
    1818private:
    19     void generateDoBlockMethod(llvm::Value * blockNo) override;
    20     void generateFinalBlockMethod(llvm::Value * remainingBytes, llvm::Value * blockNo) override;
     19    void generateDoBlockMethod() override;
     20    void generateFinalBlockMethod(llvm::Value * remainingBytes) override;
    2121};
    2222
  • icGREP/icgrep-devel/icgrep/kernels/scanmatchgen.cpp

    r5292 r5297  
    3838}
    3939       
    40 void ScanMatchKernel::generateDoBlockMethod(Value * blockNo) {
     40void ScanMatchKernel::generateDoBlockMethod() {
    4141
    4242    auto savePoint = iBuilder->saveIP();
     
    4747    const unsigned fieldCount = iBuilder->getBitBlockWidth() / T->getBitWidth();
    4848    Type * scanwordVectorType =  VectorType::get(T, fieldCount);
     49    Value * blockNo = getBlockNo();
    4950    Value * scanwordPos = iBuilder->CreateMul(blockNo, ConstantInt::get(blockNo->getType(), iBuilder->getBitBlockWidth()));   
    5051    Value * recordStart = getScalarField("LineStart");
    5152    Value * recordNum = getScalarField("LineNum");
    52     Value * matches = iBuilder->CreateBlockAlignedLoad(getStream("matchResults", blockNo, iBuilder->getInt32(0)));
    53     Value * linebreaks = iBuilder->CreateBlockAlignedLoad(getStream("matchResults", blockNo, iBuilder->getInt32(1)));
     53    Value * matches = iBuilder->CreateBlockAlignedLoad(getInputStream("matchResults", iBuilder->getInt32(0)));
     54    Value * linebreaks = iBuilder->CreateBlockAlignedLoad(getInputStream("matchResults", iBuilder->getInt32(1)));
    5455    Value * matchWordVector = iBuilder->CreateBitCast(matches, scanwordVectorType);
    5556    Value * breakWordVector = iBuilder->CreateBitCast(linebreaks, scanwordVectorType);
  • icGREP/icgrep-devel/icgrep/kernels/scanmatchgen.h

    r5292 r5297  
    1818    ScanMatchKernel(IDISA::IDISA_Builder * iBuilder, GrepType grepType);
    1919protected:
    20     void generateDoBlockMethod(llvm::Value * blockNo) override;
     20    void generateDoBlockMethod() override;
    2121private:
    2222    llvm::Function * generateScanWordRoutine(llvm::Module * m) const;
  • icGREP/icgrep-devel/icgrep/kernels/streamset.cpp

    r5283 r5297  
    2323using namespace IDISA;
    2424
    25 Type * StreamSetBuffer::resolveStreamTypes(Type * type) {
     25Type * StreamSetBuffer::resolveStreamSetBufferType(Type * type) const {
    2626    if (auto ty = dyn_cast<ArrayType>(type)) {
    2727        unsigned numElems = ty->getNumElements();
     
    3838
    3939void StreamSetBuffer::allocateBuffer() {
    40     mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(mStreamSetType, iBuilder->getSize(mBufferBlocks));
     40    mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(getType(), iBuilder->getSize(mBufferSize));
    4141}
    4242
     
    4747Value * StreamSetBuffer::getStream(Value * self, Value * blockNo, Value * index1, Value * index2) const {
    4848    return iBuilder->CreateGEP(getStreamSetPtr(self, blockNo), {iBuilder->getInt32(0), index1, index2});
    49 }
    50 
    51 Value * StreamSetBuffer::getStreamView(llvm::Value * self, Value * blockNo, llvm::Value * index) const {
    52     return iBuilder->CreateGEP(getStreamSetPtr(self, blockNo), index, "view");
    5349}
    5450
     
    6561
    6662// External File Buffer
    67 void ExternalFileBuffer::setStreamSetBuffer(Value * ptr, Value * fileSize) {
    68    
    69     PointerType * t = getStreamBufferPointerType();   
    70     mStreamSetBufferPtr = iBuilder->CreatePointerBitCastOrAddrSpaceCast(ptr, t);
     63void ExternalFileBuffer::setStreamSetBuffer(Value * ptr, Value * /* fileSize */) {
     64    mStreamSetBufferPtr = iBuilder->CreatePointerBitCastOrAddrSpaceCast(ptr, getPointerType());
    7165}
    7266
    73 void ExternalFileBuffer::setEmptyBuffer(Value * ptr) {
    74    
    75     PointerType * t = getStreamBufferPointerType();   
    76     mStreamSetBufferPtr = iBuilder->CreatePointerBitCastOrAddrSpaceCast(ptr, t);
     67void ExternalFileBuffer::setEmptyBuffer(Value * ptr) {   
     68    mStreamSetBufferPtr = iBuilder->CreatePointerBitCastOrAddrSpaceCast(ptr, getPointerType());
    7769}
    7870
     
    9183
    9284    Value * offset = nullptr;
    93     if (mBufferBlocks == 1) {
     85    if (mBufferSize == 1) {
    9486        offset = ConstantInt::getNullValue(iBuilder->getSizeTy());
    95     } else if ((mBufferBlocks & (mBufferBlocks - 1)) == 0) { // is power of 2
    96         offset = iBuilder->CreateAnd(blockNo, ConstantInt::get(blockNo->getType(), mBufferBlocks - 1));
     87    } else if ((mBufferSize & (mBufferSize - 1)) == 0) { // is power of 2
     88        offset = iBuilder->CreateAnd(blockNo, ConstantInt::get(blockNo->getType(), mBufferSize - 1));
    9789    } else {
    98         offset = iBuilder->CreateURem(blockNo, ConstantInt::get(blockNo->getType(), mBufferBlocks));
     90        offset = iBuilder->CreateURem(blockNo, ConstantInt::get(blockNo->getType(), mBufferSize));
    9991    }
    10092    return iBuilder->CreateGEP(self, offset);
     
    10597Value * LinearCopybackBuffer::getStreamSetPtr(Value * self, Value * blockNo) const {
    10698    Value * offset = nullptr;
    107     if (mBufferBlocks == 1) {
     99    if (mBufferSize == 1) {
    108100        offset = ConstantInt::getNullValue(iBuilder->getSizeTy());
    109     } else if ((mBufferBlocks & (mBufferBlocks - 1)) == 0) { // is power of 2
    110         offset = iBuilder->CreateAnd(blockNo, ConstantInt::get(blockNo->getType(), mBufferBlocks - 1));
     101    } else if ((mBufferSize & (mBufferSize - 1)) == 0) { // is power of 2
     102        offset = iBuilder->CreateAnd(blockNo, ConstantInt::get(blockNo->getType(), mBufferSize - 1));
    111103    } else {
    112         offset = iBuilder->CreateURem(blockNo, ConstantInt::get(blockNo->getType(), mBufferBlocks));
     104        offset = iBuilder->CreateURem(blockNo, ConstantInt::get(blockNo->getType(), mBufferSize));
    113105    }
    114106    return iBuilder->CreateGEP(self, offset);
     
    130122}
    131123
    132 llvm::Value * ExpandableBuffer::getStreamView(llvm::Value * self, llvm::Value * blockNo, llvm::Value * index) const {
    133     return nullptr;
    134 }
    135 
    136124llvm::Value * ExpandableBuffer::getStreamView(llvm::Type * type, llvm::Value * self, llvm::Value * blockNo, llvm::Value * index) const {
    137125    return nullptr;
    138126}
    139 
    140127
    141128// Constructors
     
    169156: mBufferKind(k)
    170157, iBuilder(b)
    171 , mStreamSetType(resolveStreamTypes(type))
    172 , mBufferBlocks(blocks)
    173 , mAddrSpace(AddressSpace)
    174 , mStreamSetBufferPtr(nullptr) {
     158, mStreamSetType(resolveStreamSetBufferType(type))
     159, mBufferSize(blocks)
     160, mAddressSpace(AddressSpace)
     161, mStreamSetBufferPtr(nullptr)
     162, mBaseStreamSetType(type) {
    175163
    176164}
  • icGREP/icgrep-devel/icgrep/kernels/streamset.h

    r5276 r5297  
    2222    enum class BufferKind : unsigned {BlockBuffer, ExternalFileBuffer, CircularBuffer, LinearCopybackBuffer, ExpandableBuffer};
    2323
    24     inline BufferKind getBufferKind() const {
     24    BufferKind getBufferKind() const {
    2525        return mBufferKind;
    2626    }
    2727
    28     inline llvm::Type * getBufferStreamSetType() const {
     28    llvm::Type * getType() const {
    2929        return mStreamSetType;
    3030    }
    3131
    32     llvm::PointerType * getStreamBufferPointerType() const {
    33         return mStreamSetType->getPointerTo(mAddrSpace);
     32    llvm::Type * getBaseType() const {
     33        return mBaseStreamSetType;
    3434    }
    3535
    36     size_t getBufferSize() const { return mBufferBlocks; }
     36    llvm::PointerType * getPointerType() const {
     37        return getType()->getPointerTo(mAddressSpace);
     38    }
    3739
    38     llvm::Value * getStreamSetBasePtr() const { return mStreamSetBufferPtr; }
     40    size_t getBufferSize() const {
     41        return mBufferSize;
     42    }
     43
     44    llvm::Value * getStreamSetBasePtr() const {
     45        return mStreamSetBufferPtr;
     46    }
    3947
    4048    virtual void allocateBuffer();
     
    4452    virtual llvm::Value * getStream(llvm::Value * self, llvm::Value * blockNo, llvm::Value * index1, llvm::Value * index2) const;
    4553   
    46     virtual llvm::Value * getStreamView(llvm::Value * self, llvm::Value * blockNo, llvm::Value * index) const;
    47 
    4854    virtual llvm::Value * getStreamView(llvm::Type * type, llvm::Value * self, llvm::Value * blockNo, llvm::Value * index) const;
    4955
    50     llvm::Type * resolveStreamTypes(llvm::Type * type);
    51    
    5256protected:
    5357
     
    5761    virtual llvm::Value * getStreamSetPtr(llvm::Value * self, llvm::Value * blockNo) const = 0;
    5862
     63    llvm::Type * resolveStreamSetBufferType(llvm::Type * type) const;
     64
    5965protected:
    6066    const BufferKind                mBufferKind;
    6167    IDISA::IDISA_Builder * const    iBuilder;
    6268    llvm::Type * const              mStreamSetType;
    63     const size_t                    mBufferBlocks;
    64     const int                       mAddrSpace;
     69    const size_t                    mBufferSize;
     70    const unsigned                  mAddressSpace;
    6571    llvm::Value *                   mStreamSetBufferPtr;
     72    llvm::Type * const              mBaseStreamSetType;
    6673};   
    6774
     
    139146    llvm::Value * getStream(llvm::Value * self, llvm::Value * blockNo, llvm::Value * index1, llvm::Value * index2) const override;
    140147
    141     llvm::Value * getStreamView(llvm::Value * self, llvm::Value * blockNo, llvm::Value * index) const override;
    142 
    143148    llvm::Value * getStreamView(llvm::Type * type, llvm::Value * self, llvm::Value * blockNo, llvm::Value * index) const override;
    144149
  • icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.cpp

    r5292 r5297  
    4848}
    4949   
    50 void PabloCompiler::compile(Value * const blockNo) {
     50void PabloCompiler::compile() {
    5151
    5252    mCarryManager->initializeCodeGen();
     
    5555    mMarker.emplace(entryBlock->createZeroes(), iBuilder->allZeroes());
    5656    mMarker.emplace(entryBlock->createOnes(), iBuilder->allOnes());
     57
     58    Value * const blockNo =  mKernel->getBlockNo();
    5759
    5860    for (unsigned i = 0; i < mKernel->getNumOfInputs(); ++i) {
  • icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.h

    r5292 r5297  
    3434    ~PabloCompiler();
    3535    void initializeKernelData();
    36     void compile(llvm::Value * const blockNo);
     36    void compile();
    3737
    3838private:
  • icGREP/icgrep-devel/icgrep/pablo/pablo_kernel.cpp

    r5292 r5297  
    8787}
    8888
    89 void PabloKernel::generateDoBlockMethod(Value * blockNo) {
    90     mPabloCompiler->compile(blockNo);
     89void PabloKernel::generateDoBlockMethod() {
     90    mPabloCompiler->compile();
    9191}
    9292
    93 void PabloKernel::generateFinalBlockMethod(Value *remainingBytes, Value * blockNo) {
     93void PabloKernel::generateFinalBlockMethod(Value * remainingBytes) {
    9494    // Standard Pablo convention for final block processing: set a bit marking
    9595    // the position just past EOF, as well as a mask marking all positions past EOF.
  • icGREP/icgrep-devel/icgrep/pablo/pablo_kernel.h

    r5292 r5297  
    116116    void prepareKernel()  override final;
    117117
    118     void generateDoBlockMethod(llvm::Value * blockNo) override final;
     118    void generateDoBlockMethod() override final;
    119119
    120120    // The default method for Pablo final block processing sets the
    121121    // EOFmark bit and then calls the standard DoBlock function.
    122122    // This may be overridden for specialized processing.
    123     virtual void generateFinalBlockMethod(llvm::Value * remainingBytes, llvm::Value * blockNo) override final;
     123    virtual void generateFinalBlockMethod(llvm::Value * remainingBytes) override final;
    124124
    125125    inline String * makeName(const llvm::StringRef & prefix) const {
Note: See TracChangeset for help on using the changeset viewer.