Changeset 5285


Ignore:
Timestamp:
Jan 28, 2017, 3:12:03 PM (8 months ago)
Author:
nmedfort
Message:

Start of work to simplify kernel writing. Removed generateDoBlockLogic method.

Location:
icGREP/icgrep-devel/icgrep
Files:
39 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/UCD/ucd_compiler.cpp

    r5267 r5285  
    541541            std::string name = t.first->getName();
    542542            if (Statement * result = dyn_cast<Statement>(f->second)) {
    543                 result->setName(entry.getName(name));
     543                result->setName(entry.makeName(name));
    544544                t.second = result;
    545545            } else {
  • icGREP/icgrep-devel/icgrep/editd/editd_cpu_kernel.cpp

    r5283 r5285  
    2323}
    2424
    25 void editdCPUKernel::generateFinalBlockMethod() const {
    26     IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
    27     Module * m = iBuilder->getModule();
    28     Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
    29     Function * finalBlockFunction = m->getFunction(mKernelName + finalBlock_suffix);
    30     iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "fb_entry", finalBlockFunction, 0));
    31     // Final Block arguments: self, remaining, then the standard DoBlock args.
    32     Function::arg_iterator args = finalBlockFunction->arg_begin();
    33     Value * self = &*(args++);
    34     Value * remaining = &*(args++);
    35     std::vector<Value *> doBlockArgs = {self};
    36     while (args != finalBlockFunction->arg_end()){
    37         doBlockArgs.push_back(&*args++);
    38     }
    39     setScalarField(self, "EOFmask", iBuilder->bitblock_mask_from(remaining));
    40     iBuilder->CreateCall(doBlockFunction, doBlockArgs);
    41     iBuilder->CreateRetVoid();
    42     iBuilder->restoreIP(savePoint);
    43 }
    44    
    45 void editdCPUKernel::generateDoBlockMethod() const {
    46     IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
    47     Module * m = iBuilder->getModule(); 
     25void editdCPUKernel::generateDoBlockMethod(Function * function, Value * self, Value * blockNo) const {
     26    auto savePoint = iBuilder->saveIP();
    4827
    4928    Type * const int32ty = iBuilder->getInt32Ty();
    5029    Type * const int8ty = iBuilder->getInt8Ty();
    5130
    52     Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
    53        
    54     BasicBlock * entryBlock = BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0);
    55    
    56     iBuilder->SetInsertPoint(entryBlock);
    57 
    58     Value * kernelStuctParam = getParameter(doBlockFunction, "self");
    59     Value * pattStartPtr = getScalarField(kernelStuctParam, "pattStream");
    60     Value * stideCarryArr = getScalarField(kernelStuctParam, "srideCarry");
    61     Value * blockNo = getScalarField(kernelStuctParam, blockNoScalar);
     31    Value * pattStartPtr = getScalarField(self, "pattStream");
     32    Value * stideCarryArr = getScalarField(self, "srideCarry");
    6233
    6334    unsigned carryIdx = 0;
    6435
    65     std::vector<std::vector<Value *>> e(mPatternLen+1, std::vector<Value *>(mEditDistance+1));
    66     std::vector<std::vector<Value *>> adv(mPatternLen, std::vector<Value *>(mEditDistance+1));
     36    std::vector<std::vector<Value *>> e(mPatternLen + 1, std::vector<Value *>(mEditDistance + 1));
     37    std::vector<std::vector<Value *>> adv(mPatternLen, std::vector<Value *>(mEditDistance + 1));
    6738    std::vector<std::vector<int>> calculated(mPatternLen, std::vector<int>(mEditDistance + 1, 0));
    6839    Value * pattPos = iBuilder->getInt32(0);
     
    7041    Value * pattCh = iBuilder->CreateLoad(pattPtr);
    7142    Value * pattIdx = iBuilder->CreateAnd(iBuilder->CreateLShr(pattCh, 1), ConstantInt::get(int8ty, 3));
    72     Value * pattStreamPtr = getStream(kernelStuctParam, "CCStream", blockNo, iBuilder->CreateZExt(pattIdx, int32ty));
     43    Value * pattStreamPtr = getStream(self, "CCStream", blockNo, iBuilder->CreateZExt(pattIdx, int32ty));
    7344    Value * pattStream = iBuilder->CreateLoad(pattStreamPtr);
    7445    pattPos = iBuilder->CreateAdd(pattPos, ConstantInt::get(int32ty, 1));
     
    8354        pattCh = iBuilder->CreateLoad(pattPtr);
    8455        pattIdx = iBuilder->CreateAnd(iBuilder->CreateLShr(pattCh, 1), ConstantInt::get(int8ty, 3));
    85         pattStreamPtr = getStream(kernelStuctParam, "CCStream", blockNo, iBuilder->CreateZExt(pattIdx, int32ty));
     56        pattStreamPtr = getStream(self, "CCStream", blockNo, iBuilder->CreateZExt(pattIdx, int32ty));
    8657        pattStream = iBuilder->CreateLoad(pattStreamPtr);
    8758
     
    10172    }
    10273   
    103     Value * ptr = getStream(kernelStuctParam, "ResultStream", blockNo, iBuilder->getInt32(0));
     74    Value * ptr = getStream(self, "ResultStream", blockNo, iBuilder->getInt32(0));
    10475    iBuilder->CreateStore(e[mPatternLen - 1][0], ptr);
    10576    for(unsigned j = 1; j<= mEditDistance; j++){
    106         ptr = getStream(kernelStuctParam, "ResultStream", blockNo, iBuilder->getInt32(j));
     77        ptr = getStream(self, "ResultStream", blockNo, iBuilder->getInt32(j));
    10778        iBuilder->CreateStore(iBuilder->CreateAnd(e[mPatternLen-1][j], iBuilder->CreateNot(e[mPatternLen-1][j-1])), ptr);
    10879    }
     
    11081    iBuilder->CreateRetVoid();
    11182    iBuilder->restoreIP(savePoint);
     83}
     84
     85void editdCPUKernel::generateFinalBlockMethod(Function * function, Value * self, Value * remainingBytes, Value * blockNo) const {
     86    setScalarField(self, "EOFmask", iBuilder->bitblock_mask_from(remainingBytes));
     87    iBuilder->CreateCall(getDoBlockFunction(), {self});
    11288}
    11389
  • icGREP/icgrep-devel/icgrep/editd/editd_cpu_kernel.h

    r5283 r5285  
    1616class editdCPUKernel : public BlockOrientedKernel {
    1717public:
    18    
     18
    1919    editdCPUKernel(IDISA::IDISA_Builder * b, unsigned dist, unsigned pattLen);
    2020   
    2121   
    2222private:
    23     void generateDoBlockMethod() const override;
    24     void generateFinalBlockMethod() const override;
     23    void generateDoBlockMethod(llvm::Function * function, llvm::Value * self, llvm::Value * blockNo) const override;
     24    void generateFinalBlockMethod(llvm::Function * function, llvm::Value * self, llvm::Value * remainingBytes, llvm::Value * blockNo) const override;
    2525    void bitblock_advance_ci_co(llvm::Value * val, unsigned shift, llvm::Value * stideCarryArr, unsigned carryIdx, std::vector<std::vector<llvm::Value *>> & adv, std::vector<std::vector<int>> & calculated, int i, int j) const;
    2626    unsigned mEditDistance;
  • icGREP/icgrep-devel/icgrep/editd/editd_gpu_kernel.cpp

    r5283 r5285  
    1212
    1313void bitblock_advance_ci_co(IDISA::IDISA_Builder * iBuilder, Value * val, unsigned shift, Value * stideCarryArr, unsigned carryIdx, std::vector<std::vector<Value *>> & adv, std::vector<std::vector<int>> & calculated, int i, int j){   
    14     if(!calculated[i][j]){
     14    if (!calculated[i][j]) {
    1515        Value * ptr = iBuilder->CreateGEP(stideCarryArr, {iBuilder->getInt32(0), iBuilder->getInt32(carryIdx)});
    1616        Value * ci = iBuilder->CreateLoad(ptr);
     
    2020        calculated[i][j] = 1;
    2121    }
    22     return;
    2322}
    2423
    25 void editdGPUKernel::generateFinalBlockMethod() const {
    26     IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
    27     Module * m = iBuilder->getModule();
    28     Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
    29     Function * finalBlockFunction = m->getFunction(mKernelName + finalBlock_suffix);
    30     iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "fb_entry", finalBlockFunction, 0));
    31     // Final Block arguments: self, remaining, then the standard DoBlock args.
    32     Function::arg_iterator args = finalBlockFunction->arg_begin();
    33     Value * self = &*(args++);
    34     Value * remaining = &*(args++);
    35     std::vector<Value *> doBlockArgs = {self};
    36     while (args != finalBlockFunction->arg_end()){
    37         doBlockArgs.push_back(&*args++);
    38     }
    39     setScalarField(self, "EOFmask", iBuilder->bitblock_mask_from(remaining));
    40     iBuilder->CreateCall(doBlockFunction, doBlockArgs);
    41     iBuilder->CreateRetVoid();
    42     iBuilder->restoreIP(savePoint);
    43 }
    44    
    45 void editdGPUKernel::generateDoBlockMethod() const {
    46     IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
    47     Module * m = iBuilder->getModule(); 
     24void editdGPUKernel::generateDoBlockMethod(Function * function, Value * self, Value * blockNo) const {
    4825
    49     Type * const int32ty = iBuilder->getInt32Ty();
    50     Type * const int8ty = iBuilder->getInt8Ty();
    51 
    52     Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
    53        
    54     BasicBlock * entryBlock = BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0);
    55    
    56     iBuilder->SetInsertPoint(entryBlock);
    57 
    58     Value * kernelStuctParam = getParameter(doBlockFunction, "self");
    59     Value * pattBuf = getScalarField(kernelStuctParam, "pattStream");
    60     Value * stideCarryArr = getScalarField(kernelStuctParam, "srideCarry");
    61     Value * blockNo = getScalarField(kernelStuctParam, blockNoScalar);
    62     Value * pattLen = ConstantInt::get(int32ty, mPatternLen+1);
    63     Value * pattPos = ConstantInt::get(int32ty, 0);
     26    IntegerType * const int32ty = iBuilder->getInt32Ty();
     27    IntegerType * const int8ty = iBuilder->getInt8Ty();
     28    Value * pattLen = iBuilder->getInt32(mPatternLen + 1);
     29    Value * pattPos = iBuilder->getInt32(0);
     30    Value * pattBuf = getScalarField(self, "pattStream");
     31    Value * stideCarryArr = getScalarField(self, "srideCarry");
    6432   
    6533    unsigned carryIdx = 0;
     
    6937    std::vector<std::vector<int>> calculated(mPatternLen, std::vector<int>(mEditDistance + 1, 0));
    7038
     39    Module * m = iBuilder->getModule();
    7140    Function * bidFunc = cast<Function>(m->getOrInsertFunction("llvm.nvvm.read.ptx.sreg.ctaid.x", int32ty, nullptr));
    7241    Value * bid = iBuilder->CreateCall(bidFunc);
     
    7544    Value * pattCh = iBuilder->CreateLoad(pattPtr);
    7645    Value * pattIdx = iBuilder->CreateAnd(iBuilder->CreateLShr(pattCh, 1), ConstantInt::get(int8ty, 3));
    77     Value * pattStreamPtr = getStream(kernelStuctParam, "CCStream", blockNo, iBuilder->CreateZExt(pattIdx, int32ty));
     46    Value * pattStreamPtr = getStream(self, "CCStream", blockNo, iBuilder->CreateZExt(pattIdx, int32ty));
    7847    Value * pattStream = iBuilder->CreateLoad(pattStreamPtr);
    7948    pattPos = iBuilder->CreateAdd(pattPos, ConstantInt::get(int32ty, 1));
     
    8150    e[0][0] = pattStream;
    8251    for(unsigned j = 1; j <= mEditDistance; j++){
    83       e[0][j] = iBuilder->allOnes();
     52        e[0][j] = iBuilder->allOnes();
    8453    }
    85 
    86     for(unsigned i = 1; i<mPatternLen; i++){     
     54    for(unsigned i = 1; i < mPatternLen; i++){
    8755        pattPtr = iBuilder->CreateGEP(pattStartPtr, pattPos);
    8856        pattCh = iBuilder->CreateLoad(pattPtr);
    8957        pattIdx = iBuilder->CreateAnd(iBuilder->CreateLShr(pattCh, 1), ConstantInt::get(int8ty, 3));
    90         pattStreamPtr = getStream(kernelStuctParam, "CCStream", blockNo, iBuilder->CreateZExt(pattIdx, int32ty));
     58        pattStreamPtr = getStream(self, "CCStream", blockNo, iBuilder->CreateZExt(pattIdx, int32ty));
    9159        pattStream = iBuilder->CreateLoad(pattStreamPtr);
    92 
    9360        bitblock_advance_ci_co(iBuilder, e[i-1][0], 1, stideCarryArr, carryIdx++, adv, calculated, i-1, 0);
    9461        e[i][0] = iBuilder->CreateAnd(adv[i-1][0], pattStream);
     
    10168            Value * tmp3 = iBuilder->CreateOr(adv[i][j-1], e[i-1][j-1]);
    10269            e[i][j] = iBuilder->CreateOr(iBuilder->CreateOr(tmp1, tmp2), tmp3);
    103 
    10470        }
    10571        pattPos = iBuilder->CreateAdd(pattPos, ConstantInt::get(int32ty, 1));
    10672    }
    107 
    108     Value * ptr = getStream(kernelStuctParam, "ResultStream", blockNo, iBuilder->getInt32(0));
     73    Value * ptr = getStream(self, "ResultStream", blockNo, iBuilder->getInt32(0));
    10974    iBuilder->CreateStore(e[mPatternLen-1][0], ptr);
    11075    for(unsigned j = 1; j<= mEditDistance; j++){
    111         ptr = getStream(kernelStuctParam, "ResultStream", blockNo, iBuilder->getInt32(j));
     76        ptr = getStream(self, "ResultStream", blockNo, iBuilder->getInt32(j));
    11277        iBuilder->CreateStore(iBuilder->CreateAnd(e[mPatternLen - 1][j], iBuilder->CreateNot(e[mPatternLen - 1][j - 1])), ptr);
    11378    }
    114     iBuilder->CreateRetVoid();
    115     iBuilder->restoreIP(savePoint);
     79}
     80
     81void editdGPUKernel::generateFinalBlockMethod(Function * function, Value * self, Value * remainingBytes, Value * blockNo) const {
     82    setScalarField(self, "EOFmask", iBuilder->bitblock_mask_from(remainingBytes));
     83    iBuilder->CreateCall(getDoBlockFunction(), { self });
    11684}
    11785
     
    12391              Binding{PointerType::get(ArrayType::get(b->getBitBlockType(), pattLen * (dist + 1) * 4), 0), "srideCarry"}},
    12492              {},
    125               {Binding{b->getBitBlockType(), "EOFmask"}}),
    126 mEditDistance(dist),
    127 mPatternLen(pattLen) {
    128 setDoBlockUpdatesProducedItemCountsAttribute(false);
     93              {Binding{b->getBitBlockType(), "EOFmask"}})
     94, mEditDistance(dist)
     95, mPatternLen(pattLen) {
     96    setDoBlockUpdatesProducedItemCountsAttribute(false);
    12997}
    13098
  • icGREP/icgrep-devel/icgrep/editd/editd_gpu_kernel.h

    r5283 r5285  
    2121   
    2222private:
    23     void generateDoBlockMethod() const override;
    24     void generateFinalBlockMethod() const override;
     23    void generateDoBlockMethod(llvm::Function * function, llvm::Value * self, llvm::Value * blockNo) const override;
     24    void generateFinalBlockMethod(llvm::Function * function, llvm::Value * self, llvm::Value * remainingBytes, llvm::Value * blockNo) const override;
    2525    unsigned mEditDistance;
    2626    unsigned mPatternLen;
  • icGREP/icgrep-devel/icgrep/editd/editdscan_kernel.cpp

    r5283 r5285  
    1818}
    1919
    20 void editdScanKernel::generateDoBlockMethod() const {
     20void editdScanKernel::generateDoBlockMethod(Function * function, Value * self, Value * blockNo) const {
    2121    auto savePoint = iBuilder->saveIP();
    22     Module * m = iBuilder->getModule();
    23     Function * scanWordFunction = generateScanWordRoutine(m);
     22    Function * scanWordFunction = generateScanWordRoutine(iBuilder->getModule());
     23    iBuilder->restoreIP(savePoint);
     24
    2425    const unsigned fieldCount = iBuilder->getBitBlockWidth() / mScanwordBitWidth;
    2526    Type * T = iBuilder->getIntNTy(mScanwordBitWidth);
    26     Type * scanwordVectorType =  VectorType::get(T, fieldCount);
    27 
    28     Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
    29 
    30     iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
    31     Value * kernelStuctParam = getParameter(doBlockFunction, "self");
    32     Value * blockNo = getScalarField(kernelStuctParam, blockNoScalar);
     27    VectorType * scanwordVectorType =  VectorType::get(T, fieldCount);
    3328    Value * scanwordPos = iBuilder->CreateMul(blockNo, ConstantInt::get(blockNo->getType(), iBuilder->getBitBlockWidth()));
    3429   
    3530    std::vector<Value * > matchWordVectors;
    36     for(unsigned d = 0; d <= mEditDistance; d++){
    37         Value * ptr = getStream(kernelStuctParam, "matchResults", blockNo, iBuilder->getInt32(d));
     31    for(unsigned d = 0; d <= mEditDistance; d++) {
     32        Value * ptr = getStream(self, "matchResults", blockNo, iBuilder->getInt32(d));
    3833        Value * matches = iBuilder->CreateBlockAlignedLoad(ptr);
    3934        matchWordVectors.push_back(iBuilder->CreateBitCast(matches, scanwordVectorType));
    4035    }
    4136   
    42     for(unsigned i = 0; i < fieldCount; ++i){       
    43         for(unsigned d = 0; d <= mEditDistance; d++){
     37    for(unsigned i = 0; i < fieldCount; ++i) {
     38        for(unsigned d = 0; d <= mEditDistance; d++) {
    4439            Value * matchWord = iBuilder->CreateExtractElement(matchWordVectors[d], ConstantInt::get(T, i));
    4540            iBuilder->CreateCall(scanWordFunction, {matchWord, iBuilder->getInt32(d), scanwordPos});
     
    4843
    4944    }
    50     iBuilder -> CreateRetVoid();
    51     iBuilder->restoreIP(savePoint);
    5245}
    5346
    5447Function * editdScanKernel::generateScanWordRoutine(Module * m) const {
    5548
    56     Type * T = iBuilder->getIntNTy(mScanwordBitWidth);
     49    IntegerType * T = iBuilder->getIntNTy(mScanwordBitWidth);
    5750
    5851    Function * scanFunc = cast<Function>(m->getOrInsertFunction("scan_word", iBuilder->getVoidTy(), T, iBuilder->getInt32Ty(), T, nullptr));
  • icGREP/icgrep-devel/icgrep/editd/editdscan_kernel.h

    r5283 r5285  
    1818       
    1919private:
    20     void generateDoBlockMethod() const override;
     20    void generateDoBlockMethod(llvm::Function * function, llvm::Value * self, llvm::Value * blockNo) const override;
    2121    llvm::Function * generateScanWordRoutine(llvm::Module * m) const;
    2222       
  • icGREP/icgrep-devel/icgrep/icgrep-devel.files

    r5283 r5285  
     1cc/alphabet.cpp
     2cc/alphabet.h
    13cc/cc_compiler.cpp
    24cc/cc_compiler.h
     
    2931IR_Gen/idisa_target.h
    3032IR_Gen/llvm2ptx.h
     33IR_Gen/tracegen.h
    3134kernels/cc_kernel.cpp
    3235kernels/cc_kernel.h
    3336kernels/deletion.cpp
    3437kernels/deletion.h
     38kernels/evenodd.cpp
     39kernels/evenodd.h
    3540kernels/interface.cpp
    3641kernels/interface.h
     
    4954kernels/scanmatchgen.cpp
    5055kernels/scanmatchgen.h
     56kernels/stdin_kernel.cpp
     57kernels/stdin_kernel.h
    5158kernels/stdout_kernel.cpp
    5259kernels/stdout_kernel.h
     
    216223utf8_encoder.h
    217224wc.cpp
    218 CMakeLists.txt
    219 cc/alphabet.cpp
    220 cc/alphabet.h
    221 kernels/cc_kernel.cpp
    222 kernels/cc_kernel.h
    223 kernels/deletion.cpp
    224 kernels/deletion.h
    225 kernels/interface.cpp
    226 kernels/interface.h
    227 kernels/kernel.cpp
    228 kernels/kernel.h
    229 kernels/mmap_kernel.cpp
    230 kernels/mmap_kernel.h
    231 kernels/p2s_kernel.cpp
    232 kernels/p2s_kernel.h
    233 kernels/pipeline.cpp
    234 kernels/pipeline.h
    235 kernels/radix64.cpp
    236 kernels/radix64.h
    237 kernels/s2p_kernel.cpp
    238 kernels/s2p_kernel.h
    239 kernels/scanmatchgen.cpp
    240 kernels/scanmatchgen.h
    241 kernels/stdin_kernel.cpp
    242 kernels/stdin_kernel.h
    243 kernels/stdout_kernel.cpp
    244 kernels/stdout_kernel.h
    245 kernels/streamset.cpp
    246 kernels/streamset.h
    247 kernels/evenodd.h
    248 kernels/evenodd.cpp
  • icGREP/icgrep-devel/icgrep/kernels/cc_kernel.cpp

    r5283 r5285  
    1616using namespace llvm;
    1717
    18 void DirectCharacterClassKernelBuilder::generateDoBlockMethod() const {
    19     auto savePoint = iBuilder->saveIP();
    20     Module * m = iBuilder->getModule();
    21 
    22     Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
    23    
    24     iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
    25    
    26     Value * self = getParameter(doBlockFunction, "self");
    27     Value * blockNo = getScalarField(self, blockNoScalar);
    28 
     18void DirectCharacterClassKernelBuilder::generateDoBlockMethod(Function * function, Value *self, Value *blockNo) const {
    2919    unsigned packCount = 8 * mCodeUnitSize; 
    3020    unsigned codeUnitWidth = 8 * mCodeUnitSize;
     
    3424        codeUnitPack[i] = iBuilder->CreateBlockAlignedLoad(ptr);
    3525    }
    36 
    3726    for (unsigned j = 0; j < mCharClasses.size();  j++) {
    3827        Value * theCCstream = iBuilder->allZeroes();
     
    7059        iBuilder->CreateBlockAlignedStore(theCCstream, ptr);
    7160    }
    72  
    73     iBuilder->CreateRetVoid();
    74     iBuilder->restoreIP(savePoint);
    7561}
    7662
  • icGREP/icgrep-devel/icgrep/kernels/cc_kernel.h

    r5283 r5285  
    2727    }
    2828   
    29     void generateDoBlockMethod() const override;
     29    void generateDoBlockMethod(llvm::Function * function, llvm::Value * self, llvm::Value * blockNo) const override;
    3030
    3131private:
  • icGREP/icgrep-devel/icgrep/kernels/deletion.cpp

    r5283 r5285  
    5353// Outputs: the deleted streams, plus a partial sum popcount
    5454
    55 void DeletionKernel::generateDoBlockMethod() const {
    56 
    57     auto savePoint = iBuilder->saveIP();
    58     Module * m = iBuilder->getModule();
    59 
    60     Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
    61 
    62     iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
    63 
    64     Value * self = getParameter(doBlockFunction, "self");
    65 
    66     Value * blockNo = getScalarField(self, blockNoScalar);
    67 
     55void DeletionKernel::generateDoBlockMethod(Function * function, Value * self, Value * blockNo) const {
    6856    Value * delMaskPtr = getStream(self, "delMaskSet", blockNo, iBuilder->getInt32(0));
    6957    Value * delMask = iBuilder->CreateBlockAlignedLoad(delMaskPtr);
    70 
    7158    std::vector<Value *> move_masks = parallel_prefix_deletion_masks(iBuilder, mDeletionFieldWidth, delMask);
    72 
    7359    for (unsigned j = 0; j < mStreamCount; ++j) {
    7460        Value * inputStreamPtr = getStream(self, "inputStreamSet", blockNo, iBuilder->getInt32(j));
    7561        Value * input = iBuilder->CreateBlockAlignedLoad(inputStreamPtr);
    76 
    7762        Value * output = apply_parallel_prefix_deletion(iBuilder, mDeletionFieldWidth, delMask, move_masks, input);
    78 
    7963        Value * outputStreamPtr = getStream(self, "outputStreamSet", blockNo, iBuilder->getInt32(j));
    8064        iBuilder->CreateBlockAlignedStore(output, outputStreamPtr);
     
    8367    Value * delCountPtr = getStream(self, "deletionCounts", blockNo, iBuilder->getInt32(0));
    8468    iBuilder->CreateBlockAlignedStore(iBuilder->bitCast(delCount), delCountPtr);
    85     /* Stream deletion has only been applied within fields; the actual number of data items
    86      * has not yet changed.   */
     69    // Stream deletion has only been applied within fields; the actual number of data items has not yet changed.
    8770    Value * produced = getProducedItemCount(self, "outputStreamSet");
    8871    produced = iBuilder->CreateAdd(produced, iBuilder->getSize(iBuilder->getStride()));
    8972    setProducedItemCount(self, "outputStreamSet", produced);
    9073    setProducedItemCount(self, "deletionCounts", produced);
    91     iBuilder->CreateRetVoid();
    92     iBuilder->restoreIP(savePoint);
    9374}
    9475
    95 void DeletionKernel::generateFinalBlockMethod() const {
    96     auto savePoint = iBuilder->saveIP();
    97     Module * m = iBuilder->getModule();
    98 
    99     unsigned blockSize = iBuilder->getBitBlockWidth();
    100     Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
    101     Function * finalBlockFunction = m->getFunction(mKernelName + finalBlock_suffix);
    102 
    103     iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", finalBlockFunction, 0));
    104     Value * remainingBytes = getParameter(finalBlockFunction, "remainingBytes");
    105     Value * self = getParameter(finalBlockFunction, "self");
    106     Value * blockNo = getScalarField(self, blockNoScalar);
    107     Value * remaining = iBuilder->CreateZExt(remainingBytes, iBuilder->getIntNTy(blockSize));
    108     Value * EOF_del = iBuilder->bitCast(iBuilder->CreateShl(Constant::getAllOnesValue(iBuilder->getIntNTy(blockSize)), remaining));
     76void DeletionKernel::generateFinalBlockMethod(Function * function, Value * self, Value * remainingBytes, Value * blockNo) const {
     77    IntegerType * vecTy = iBuilder->getIntNTy(iBuilder->getBitBlockWidth());
     78    Value * remaining = iBuilder->CreateZExt(remainingBytes, vecTy);
     79    Value * EOF_del = iBuilder->bitCast(iBuilder->CreateShl(Constant::getAllOnesValue(vecTy), remaining));
    10980    Value * const delmaskPtr = getStream(self, "delMaskSet", blockNo, iBuilder->getInt32(0));
    11081    Value * const delmaskVal = iBuilder->CreateBlockAlignedLoad(delmaskPtr);
    11182    iBuilder->CreateBlockAlignedStore(iBuilder->CreateOr(EOF_del, delmaskVal), delmaskPtr);
    112     iBuilder->CreateCall(doBlockFunction, {self});
    113     /* Adjust the produced item count */
     83    iBuilder->CreateCall(getDoBlockFunction(), {self});
     84    // Adjust the produced item count
    11485    Value * produced = getProducedItemCount(self, "outputStreamSet");
    11586    produced = iBuilder->CreateSub(produced, iBuilder->getSize(iBuilder->getStride()));
     
    11788    setProducedItemCount(self, "outputStreamSet", produced);
    11889    setProducedItemCount(self, "deletionCounts", produced);
    119 
    120     iBuilder->CreateRetVoid();
    121     iBuilder->restoreIP(savePoint);
    12290}
    12391
    124 DeletionKernel::DeletionKernel(IDISA::IDISA_Builder * iBuilder, unsigned fw, unsigned streamCount) :
    125 BlockOrientedKernel(iBuilder, "del",
     92DeletionKernel::DeletionKernel(IDISA::IDISA_Builder * iBuilder, unsigned fw, unsigned streamCount)
     93: BlockOrientedKernel(iBuilder, "del",
    12694              {Binding{iBuilder->getStreamSetTy(streamCount), "inputStreamSet"},
    12795               Binding{iBuilder->getStreamSetTy(), "delMaskSet"}},
    12896              {Binding{iBuilder->getStreamSetTy(streamCount), "outputStreamSet"},
    12997               Binding{iBuilder->getStreamSetTy(), "deletionCounts"}},
    130               {}, {}, {}),
    131 mDeletionFieldWidth(fw),
    132 mStreamCount(streamCount) {
     98              {}, {}, {})
     99, mDeletionFieldWidth(fw)
     100, mStreamCount(streamCount) {
    133101    mDoBlockUpdatesProducedItemCountsAttribute = true;
    134102}
  • icGREP/icgrep-devel/icgrep/kernels/deletion.h

    r5283 r5285  
    2929protected:
    3030
    31     void generateDoBlockMethod() const override;
     31    void generateDoBlockMethod(llvm::Function * function, llvm::Value * self, llvm::Value * blockNo) const override;
    3232
    33     void generateFinalBlockMethod() const override;
     33    void generateFinalBlockMethod(llvm::Function * function, llvm::Value * self, llvm::Value * remainingBytes, llvm::Value * blockNo) const override;
    3434
    3535private:
  • icGREP/icgrep-devel/icgrep/kernels/evenodd.cpp

    r5283 r5285  
    2222namespace kernel {
    2323
    24    
    25 void EvenOddKernel::generateDoBlockLogic(Value * self, Value * blockNo) const {
     24void EvenOddKernel::generateDoBlockMethod(Function * function, Value * self, Value * blockNo) const {
    2625    Value * even = iBuilder->simd_fill(64, iBuilder->getInt64(0x5555555555555555));
    2726    Value * odd = iBuilder->bitCast(iBuilder->simd_fill(8, iBuilder->getInt8(0xAA)));
     
    3130    iBuilder->CreateBlockAlignedStore(odd, oddBitsPtr);
    3231}
    33 
    34 void EvenOddKernel::generateDoBlockMethod() const {
    35     auto savePoint = iBuilder->saveIP();
    36 
    37     Function * doBlockFunction = iBuilder->getModule()->getFunction(mKernelName + doBlock_suffix);
    38    
    39     iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction));
    40    
    41     Value * self = getParameter(doBlockFunction, "self");
    42     Value * blockNo = getScalarField(self, blockNoScalar);
    43    
    44     generateDoBlockLogic(self, blockNo);
    45 
    46     iBuilder->CreateRetVoid();
    47     iBuilder->restoreIP(savePoint);
    48 }
    49 
    50 void EvenOddKernel::generateFinalBlockMethod() const {
    51     auto savePoint = iBuilder->saveIP();
    52     Module * m = iBuilder->getModule();
    53     Function * finalBlockFunction = m->getFunction(mKernelName + finalBlock_suffix);
    54     iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "fb_entry", finalBlockFunction, 0));
    55    
    56     Value * self = getParameter(finalBlockFunction, "self");
    57     Value * remainingBytes = getParameter(finalBlockFunction, "remainingBytes");
    58     Value * blockNo = getScalarField(self, blockNoScalar);
    59     generateDoBlockLogic(self, blockNo);
    60    
    61     iBuilder->CreateRetVoid();
    62     iBuilder->restoreIP(savePoint);
    63 }
    64    
    65 
    6632
    6733EvenOddKernel::EvenOddKernel(IDISA::IDISA_Builder * builder)
  • icGREP/icgrep-devel/icgrep/kernels/evenodd.h

    r5283 r5285  
    2121private:
    2222    void generateDoBlockLogic(llvm::Value * self, llvm::Value * blockNo) const override;   
    23     void generateDoBlockMethod() const override;
     23    void generateDoBlockMethod(llvm::Function * function, llvm::Value * self, llvm::Value * blockNo) const override;
    2424    void generateFinalBlockMethod() const override;
    2525
  • icGREP/icgrep-devel/icgrep/kernels/interface.cpp

    r5267 r5285  
    2323        throw std::runtime_error("Kernel interface " + mKernelName + " not yet finalized.");
    2424    }
    25     Type * selfType = PointerType::getUnqual(mKernelStateType);
     25    PointerType * selfType = PointerType::getUnqual(mKernelStateType);
    2626
    2727    // Create the accumulator get function prototypes
     
    109109}
    110110
    111 Value * KernelInterface::createDoBlockCall(Value * self) const {
    112     Module * m = iBuilder->getModule();
    113     std::string doBlockName = mKernelName + doBlock_suffix;
    114     Function * doBlockMethod = m->getFunction(doBlockName);
    115     if (!doBlockMethod) {
    116         throw std::runtime_error("Cannot find " + doBlockName);
    117     }
    118     std::vector<Value *> args = {self};
    119     return iBuilder->CreateCall(doBlockMethod, args);
    120 }
    121 
    122 Value * KernelInterface::createFinalBlockCall(Value * self, Value * remainingBytes) const {
    123     Module * m = iBuilder->getModule();
    124     std::string finalBlockName = mKernelName + finalBlock_suffix;
    125     Function * finalBlockMethod = m->getFunction(finalBlockName);
    126     if (!finalBlockMethod) {
    127         throw std::runtime_error("Cannot find " + finalBlockName);
    128     }
    129     std::vector<Value *> args = {self, remainingBytes};
    130     return iBuilder->CreateCall(finalBlockMethod, args);
    131 }
    132 
    133 
    134111Value * KernelInterface::createDoSegmentCall(std::vector<Value *> args) const {
    135112    Module * m = iBuilder->getModule();
     
    152129}
    153130
    154 
     131Function * KernelInterface::getDoSegmentFunction() const {
     132    return iBuilder->getModule()->getFunction(mKernelName + doSegment_suffix);
     133}
  • icGREP/icgrep-devel/icgrep/kernels/interface.h

    r5283 r5285  
    1010#include <vector>  // for vector
    1111namespace IDISA { class IDISA_Builder; }
     12namespace llvm { class Function; }
    1213namespace llvm { class Module; }
    1314namespace llvm { class StructType; }
     
    4344       
    4445    const std::vector<Binding> & getStreamInputs() const {return mStreamSetInputs;}
     46
    4547    const std::vector<Binding> & getStreamOutputs() const {return mStreamSetOutputs;}
     48
    4649    const std::vector<Binding> & getScalarInputs() const { return mScalarInputs;}
     50
    4751    const std::vector<Binding> & getScalarOutputs() const { return mScalarOutputs;}
    4852   
     
    5054    // Add ExternalLinkage method declarations for the kernel to a given client module.
    5155    void addKernelDeclarations(llvm::Module * client) const;
     56
    5257    virtual void createInstance() = 0;
     58
    5359    void setInitialArguments(std::vector<llvm::Value *> args);
     60
    5461    llvm::Value * getInstance() const { return mKernelInstance; }
    5562
    5663    llvm::Value * createDoSegmentCall(std::vector<llvm::Value *> args) const;
    57     llvm::Value * createFinalBlockCall(llvm::Value * self, llvm::Value * remainingBytes) const;
     64
    5865    llvm::Value * createGetAccumulatorCall(llvm::Value * self, std::string accumName) const;
    5966   
     
    6774
    6875    virtual llvm::Value * getProcessedItemCount(llvm::Value * self, const std::string & ssName) const = 0;
     76
    6977    virtual llvm::Value * getProducedItemCount(llvm::Value * self, const std::string & ssName) const = 0;
     78
    7079    virtual llvm::Value * getTerminationSignal(llvm::Value * self) const = 0;
    7180   
     
    7483    }
    7584
    76     llvm::Value * createDoBlockCall(llvm::Value * self) const;
     85    llvm::Function * getDoSegmentFunction() const;
    7786
    7887protected:
     
    113122   
    114123};
     124
    115125#endif
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5283 r5285  
    2121namespace llvm { class Type; }
    2222
     23const std::string blockNoScalar = "blockNo";
     24
    2325using namespace llvm;
    2426using namespace kernel;
     
    149151}
    150152
     153ConstantInt * KernelBuilder::getScalarIndex(const std::string & name) const {
     154    const auto f = mKernelMap.find(name);
     155    if (LLVM_UNLIKELY(f == mKernelMap.end())) {
     156        llvm::report_fatal_error("Kernel does not contain scalar: " + name);
     157    }
     158    return iBuilder->getInt32(f->second);
     159}
     160
     161unsigned KernelBuilder::getScalarCount() const {
     162    return mKernelFields.size();
     163}
     164
     165Value * KernelBuilder::getScalarFieldPtr(Value * self, const std::string & fieldName) const {
     166    return getScalarFieldPtr(self, getScalarIndex(fieldName));
     167}
     168
     169Value * KernelBuilder::getScalarFieldPtr(Value * self, Value * index) const {
     170    return iBuilder->CreateGEP(self, {iBuilder->getInt32(0), index});
     171}
     172
     173Value * KernelBuilder::getScalarField(Value * self, const std::string & fieldName) const {
     174    return iBuilder->CreateLoad(getScalarFieldPtr(self, fieldName));
     175}
     176
     177Value * KernelBuilder::getScalarField(Value * self, Value * index) const {
     178    return iBuilder->CreateLoad(getScalarFieldPtr(self, index));
     179}
     180
     181void KernelBuilder::setScalarField(Value * self, const std::string & fieldName, Value * value) const {
     182    iBuilder->CreateStore(value, getScalarFieldPtr(self, fieldName));
     183}
     184
     185void KernelBuilder::setScalarField(Value * self, Value * index, Value * value) const {
     186    iBuilder->CreateStore(value, getScalarFieldPtr(self, index));
     187}
     188
     189LoadInst * KernelBuilder::acquireLogicalSegmentNo(Value * self) const {
     190    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(logicalSegmentNoScalar)});
     191    return iBuilder->CreateAtomicLoadAcquire(ptr);
     192}
     193
     194Value * KernelBuilder::getProcessedItemCount(Value * self, const std::string & ssName) const {
     195    return getScalarField(self, ssName + processedItemCountSuffix);
     196}
     197
     198Value * KernelBuilder::getProducedItemCount(Value * self, const std::string & ssName) const {
     199    return getScalarField(self, ssName + producedItemCountSuffix);
     200}
     201
     202Value * KernelBuilder::getTerminationSignal(Value * self) const {
     203    return getScalarField(self, terminationSignal);
     204}
     205
     206void KernelBuilder::releaseLogicalSegmentNo(Value * self, Value * newCount) const {
     207    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(logicalSegmentNoScalar)});
     208    iBuilder->CreateAtomicStoreRelease(newCount, ptr);
     209}
     210
     211void KernelBuilder::setProcessedItemCount(Value * self, const std::string & name, Value * value) const {
     212    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(name + processedItemCountSuffix)});
     213    iBuilder->CreateStore(value, ptr);
     214}
     215
     216void KernelBuilder::setProducedItemCount(Value * self, const std::string & name, Value * value) const {
     217    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(name + producedItemCountSuffix)});
     218    iBuilder->CreateStore(value, ptr);
     219}
     220
     221void KernelBuilder::setTerminationSignal(Value * self) const {
     222    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(terminationSignal)});
     223    iBuilder->CreateStore(ConstantInt::get(iBuilder->getInt1Ty(), 1), ptr);
     224}
     225
     226Value * KernelBuilder::getBlockNo(Value * self) const {
     227    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(blockNoScalar)});
     228    return iBuilder->CreateLoad(ptr);
     229}
     230
     231void KernelBuilder::setBlockNo(Value * self, Value * value) const {
     232    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(blockNoScalar)});
     233    iBuilder->CreateStore(value, ptr);
     234}
     235
     236
     237Argument * KernelBuilder::getParameter(Function * const f, const std::string & name) const {
     238    for (auto & arg : f->getArgumentList()) {
     239        if (arg.getName().equals(name)) {
     240            return &arg;
     241        }
     242    }
     243    llvm::report_fatal_error(f->getName() + " does not have parameter " + name);
     244}
     245
     246unsigned KernelBuilder::getStreamSetIndex(const std::string & name) const {
     247    const auto f = mStreamSetNameMap.find(name);
     248    if (LLVM_UNLIKELY(f == mStreamSetNameMap.end())) {
     249        llvm::report_fatal_error("Kernel " + getName() + " does not contain stream set: " + name);
     250    }
     251    return f->second;
     252}
     253
     254Value * KernelBuilder::getStreamSetBufferPtr(Value * self, const std::string & name) const {
     255    return getScalarField(self, name + bufferPtrSuffix);
     256}
     257
     258inline const StreamSetBuffer * KernelBuilder::getStreamSetBuffer(const std::string & name) const {
     259    const unsigned structIdx = getStreamSetIndex(name);
     260    if (structIdx < mStreamSetInputs.size()) {
     261        return mStreamSetInputBuffers[structIdx];
     262    } else {
     263        return mStreamSetOutputBuffers[structIdx - mStreamSetInputs.size()];
     264    }
     265}
     266
     267Value * KernelBuilder::getStreamSetPtr(Value * self, const std::string & name, Value * blockNo) const {
     268    return getStreamSetBuffer(name)->getStreamSetPtr(getStreamSetBufferPtr(self, name), blockNo);
     269}
     270
     271Value * KernelBuilder::getStream(Value * self, const std::string & name, Value * blockNo, Value * index) const {
     272    return getStreamSetBuffer(name)->getStream(getStreamSetBufferPtr(self, name), blockNo, index);
     273}
     274
     275Value * KernelBuilder::getStream(Value * self, const std::string & name, Value * blockNo, Value * index1, Value * index2) const {
     276    assert (index1->getType() == index2->getType());
     277    return getStreamSetBuffer(name)->getStream(getStreamSetBufferPtr(self, name), blockNo, index1, index2);
     278}
     279
     280Value * KernelBuilder::getStreamView(Value * self, const std::string & name, Value * blockNo, Value * index) const {
     281    return getStreamSetBuffer(name)->getStreamView(getStreamSetBufferPtr(self, name), blockNo, index);
     282}
     283
     284Value * KernelBuilder::getStreamView(llvm::Type * type, Value * self, const std::string & name, Value * blockNo, Value * index) const {
     285    return getStreamSetBuffer(name)->getStreamView(type, getStreamSetBufferPtr(self, name), blockNo, index);
     286}
     287
     288void KernelBuilder::createInstance() {
     289    if (LLVM_UNLIKELY(mKernelStateType == nullptr)) {
     290        llvm::report_fatal_error("Cannot create kernel instance before calling prepareKernel()");
     291    }
     292    mKernelInstance = iBuilder->CreateCacheAlignedAlloca(mKernelStateType);
     293    Module * m = iBuilder->getModule();
     294    std::vector<Value *> init_args = {mKernelInstance};
     295    for (auto a : mInitialArguments) {
     296        init_args.push_back(a);
     297    }
     298    for (auto b : mStreamSetInputBuffers) {
     299        init_args.push_back(b->getStreamSetBasePtr());
     300    }
     301    for (auto b : mStreamSetOutputBuffers) {
     302        init_args.push_back(b->getStreamSetBasePtr());
     303    }
     304    std::string initFnName = mKernelName + init_suffix;
     305    Function * initMethod = m->getFunction(initFnName);
     306    if (initMethod == nullptr) {
     307        llvm::report_fatal_error("Cannot find " + initFnName);
     308    }
     309    iBuilder->CreateCall(initMethod, init_args);
     310}
     311
    151312//  The default finalBlock method simply dispatches to the doBlock routine.
    152 void BlockOrientedKernel::generateFinalBlockMethod() const {
    153     auto savePoint = iBuilder->saveIP();
    154     Module * m = iBuilder->getModule();
    155     Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
    156     Function * finalBlockFunction = m->getFunction(mKernelName + finalBlock_suffix);
    157     iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "fb_entry", finalBlockFunction, 0));
    158     // Final Block arguments: self, remaining, then the standard DoBlock args.
    159     Function::arg_iterator args = finalBlockFunction->arg_begin();
    160     Value * self = &*(args++);
    161     /* Skip "remaining" arg */ args++;
    162     std::vector<Value *> doBlockArgs = {self};
    163     iBuilder->CreateCall(doBlockFunction, doBlockArgs);
    164     iBuilder->CreateRetVoid();
    165     iBuilder->restoreIP(savePoint);
    166 }
    167 
    168 // Note: this may be overridden to incorporate doBlock logic directly into
    169 // the doSegment function.
    170 void BlockOrientedKernel::generateDoBlockLogic(Value * self, Value * /* blockNo */) const {
    171     Function * doBlockFunction = iBuilder->getModule()->getFunction(mKernelName + doBlock_suffix);
    172     iBuilder->CreateCall(doBlockFunction, self);
     313void BlockOrientedKernel::generateFinalBlockMethod(Function * function, Value * self, Value * /* remainingBytes */, Value * /* blockNo */) const {
     314//    std::vector<Value *> args = {self};
     315//    for (Argument & arg : function->getArgumentList()){
     316//        args.push_back(&arg);
     317//    }
     318    iBuilder->CreateCall(getDoBlockFunction(), { self });
    173319}
    174320
     
    176322//  each block of the given number of blocksToDo, and then updates counts.
    177323void BlockOrientedKernel::generateDoSegmentMethod() const {
    178     generateDoBlockMethod();    // must be implemented by the KernelBuilder subtype
    179     generateFinalBlockMethod(); // possibly overridden by the KernelBuilder subtype
    180324    auto savePoint = iBuilder->saveIP();
     325
     326    callGenerateDoBlockMethod();
     327
     328    callGenerateDoFinalBlockMethod();
     329
    181330    Module * m = iBuilder->getModule();
    182331    Function * doSegmentFunction = m->getFunction(mKernelName + doSegment_suffix);
     
    189338    BasicBlock * segmentDone = BasicBlock::Create(iBuilder->getContext(), mKernelName + "_segmentDone", doSegmentFunction, 0);
    190339    Type * const size_ty = iBuilder->getSizeTy();
    191     Constant * stride = ConstantInt::get(size_ty, iBuilder->getStride());
    192     Value * strideBlocks = ConstantInt::get(size_ty, iBuilder->getStride() / iBuilder->getBitBlockWidth());
     340
     341    ConstantInt * stride = iBuilder->getSize(iBuilder->getStride());
     342    ConstantInt * strideBlocks = iBuilder->getSize(iBuilder->getStride() / iBuilder->getBitBlockWidth());
    193343
    194344    Function::arg_iterator args = doSegmentFunction->arg_begin();
     
    216366
    217367    iBuilder->SetInsertPoint(strideLoopBody);
    218     Value * blockNo = getScalarField(self, blockNoScalar);
    219 
    220     generateDoBlockLogic(self, blockNo);
     368    Value * blockNo = getBlockNo(self);
     369
     370    iBuilder->CreateCall(getDoBlockFunction(), self);
     371
    221372    setBlockNo(self, iBuilder->CreateAdd(blockNo, strideBlocks));
    222373    stridesRemaining->addIncoming(iBuilder->CreateSub(stridesRemaining, ConstantInt::get(size_ty, 1)), strideLoopBody);
     
    233384        for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    234385            Value * preProduced = getProducedItemCount(self, mStreamSetOutputs[i].name);
    235 
    236386            setProducedItemCount(self, mStreamSetOutputs[i].name, iBuilder->CreateAdd(preProduced, segmentItemsProcessed));
    237             //iBuilder->CallPrintInt(mKernelName + " produced ", iBuilder->CreateAdd(preProduced, segmentItemsProcessed));
    238387        }
    239388    }
     
    244393
    245394    Value * remainingItems = iBuilder->CreateSub(producerPos[0], getProcessedItemCount(self, mStreamSetInputs[0].name));
    246     //iBuilder->CallPrintInt(mKernelName + " remainingItems", remainingItems);
    247 
    248     createFinalBlockCall(self, remainingItems);
     395
     396    iBuilder->CreateCall(getDoFinalBlockFunction(), {self, remainingItems});
     397
     398    // createFinalBlockCall(self, remainingItems);
    249399    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
    250400        Value * preProcessed = getProcessedItemCount(self, mStreamSetInputs[i].name);
     
    266416}
    267417
    268 
    269 ConstantInt * KernelBuilder::getScalarIndex(const std::string & name) const {
    270     const auto f = mKernelMap.find(name);
    271     if (LLVM_UNLIKELY(f == mKernelMap.end())) {
    272         llvm::report_fatal_error("Kernel does not contain scalar: " + name);
    273     }
    274     return iBuilder->getInt32(f->second);
    275 }
    276 
    277 unsigned KernelBuilder::getScalarCount() const {
    278     return mKernelFields.size();
    279 }
    280 
    281 Value * KernelBuilder::getScalarFieldPtr(Value * self, const std::string & fieldName) const {
    282     return getScalarFieldPtr(self, getScalarIndex(fieldName));
    283 }
    284 
    285 Value * KernelBuilder::getScalarFieldPtr(Value * self, Value * index) const {
    286     return iBuilder->CreateGEP(self, {iBuilder->getInt32(0), index});
    287 }
    288 
    289 Value * KernelBuilder::getScalarField(Value * self, const std::string & fieldName) const {
    290     return iBuilder->CreateLoad(getScalarFieldPtr(self, fieldName));
    291 }
    292 
    293 Value * KernelBuilder::getScalarField(Value * self, Value * index) const {
    294     return iBuilder->CreateLoad(getScalarFieldPtr(self, index));
    295 }
    296 
    297 void KernelBuilder::setScalarField(Value * self, const std::string & fieldName, Value * value) const {
    298     iBuilder->CreateStore(value, getScalarFieldPtr(self, fieldName));
    299 }
    300 
    301 void KernelBuilder::setScalarField(Value * self, Value * index, Value * value) const {
    302     iBuilder->CreateStore(value, getScalarFieldPtr(self, index));
    303 }
    304 
    305 LoadInst * KernelBuilder::acquireLogicalSegmentNo(Value * self) const {
    306     Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(logicalSegmentNoScalar)});
    307     return iBuilder->CreateAtomicLoadAcquire(ptr);
    308 }
    309 
    310 Value * KernelBuilder::getProcessedItemCount(Value * self, const std::string & ssName) const {
    311     return getScalarField(self, ssName + processedItemCountSuffix);
    312 }
    313 
    314 Value * KernelBuilder::getProducedItemCount(Value * self, const std::string & ssName) const {
    315     return getScalarField(self, ssName + producedItemCountSuffix);
    316 }
    317 
    318 Value * KernelBuilder::getTerminationSignal(Value * self) const {
    319     return getScalarField(self, terminationSignal);
    320 }
    321 
    322 void KernelBuilder::releaseLogicalSegmentNo(Value * self, Value * newCount) const {
    323     Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(logicalSegmentNoScalar)});
    324     iBuilder->CreateAtomicStoreRelease(newCount, ptr);
    325 }
    326 
    327 void KernelBuilder::setProcessedItemCount(Value * self, const std::string & name, Value * value) const {
    328     Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(name + processedItemCountSuffix)});
    329     iBuilder->CreateStore(value, ptr);
    330 }
    331 
    332 void KernelBuilder::setProducedItemCount(Value * self, const std::string & name, Value * value) const {
    333     Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(name + producedItemCountSuffix)});
    334     iBuilder->CreateStore(value, ptr);
    335 }
    336 
    337 void KernelBuilder::setTerminationSignal(Value * self) const {
    338     Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(terminationSignal)});
    339     iBuilder->CreateStore(ConstantInt::get(iBuilder->getInt1Ty(), 1), ptr);
    340 }
    341 
    342 Value * KernelBuilder::getBlockNo(Value * self) const {
    343     Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(blockNoScalar)});
    344     return iBuilder->CreateLoad(ptr);
    345 }
    346 
    347 void KernelBuilder::setBlockNo(Value * self, Value * value) const {
    348     Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(blockNoScalar)});
    349     iBuilder->CreateStore(value, ptr);
    350 }
    351 
    352 
    353 Argument * KernelBuilder::getParameter(Function * const f, const std::string & name) const {
    354     for (auto & arg : f->getArgumentList()) {
    355         if (arg.getName().equals(name)) {
    356             return &arg;
    357         }
    358     }
    359     llvm::report_fatal_error("Method does not have parameter: " + name);
    360 }
    361 
    362 unsigned KernelBuilder::getStreamSetIndex(const std::string & name) const {
    363     const auto f = mStreamSetNameMap.find(name);
    364     if (LLVM_UNLIKELY(f == mStreamSetNameMap.end())) {
    365         llvm::report_fatal_error("Kernel " + getName() + " does not contain stream set: " + name);
    366     }
    367     return f->second;
    368 }
    369 
    370 Value * KernelBuilder::getStreamSetBufferPtr(Value * self, const std::string & name) const {
    371     return getScalarField(self, name + bufferPtrSuffix);
    372 }
    373 
    374 inline const StreamSetBuffer * KernelBuilder::getStreamSetBuffer(const std::string & name) const {
    375     const unsigned structIdx = getStreamSetIndex(name);
    376     if (structIdx < mStreamSetInputs.size()) {
    377         return mStreamSetInputBuffers[structIdx];
    378     } else {
    379         return mStreamSetOutputBuffers[structIdx - mStreamSetInputs.size()];
    380     }
    381 }
    382 
    383 Value * KernelBuilder::getStreamSetPtr(Value * self, const std::string & name, Value * blockNo) const {
    384     return getStreamSetBuffer(name)->getStreamSetPtr(getStreamSetBufferPtr(self, name), blockNo);
    385 }
    386 
    387 Value * KernelBuilder::getStream(Value * self, const std::string & name, Value * blockNo, Value * index) const {
    388     return getStreamSetBuffer(name)->getStream(getStreamSetBufferPtr(self, name), blockNo, index);
    389 }
    390 
    391 Value * KernelBuilder::getStream(Value * self, const std::string & name, Value * blockNo, Value * index1, Value * index2) const {
    392     assert (index1->getType() == index2->getType());
    393     return getStreamSetBuffer(name)->getStream(getStreamSetBufferPtr(self, name), blockNo, index1, index2);
    394 }
    395 
    396 Value * KernelBuilder::getStreamView(Value * self, const std::string & name, Value * blockNo, Value * index) const {
    397     return getStreamSetBuffer(name)->getStreamView(getStreamSetBufferPtr(self, name), blockNo, index);
    398 }
    399 
    400 Value * KernelBuilder::getStreamView(llvm::Type * type, Value * self, const std::string & name, Value * blockNo, Value * index) const {
    401     return getStreamSetBuffer(name)->getStreamView(type, getStreamSetBufferPtr(self, name), blockNo, index);
    402 }
    403 
    404 void KernelBuilder::createInstance() {
    405     if (LLVM_UNLIKELY(mKernelStateType == nullptr)) {
    406         llvm::report_fatal_error("Cannot create kernel instance before calling prepareKernel()");
    407     }
    408     mKernelInstance = iBuilder->CreateCacheAlignedAlloca(mKernelStateType);
    409     Module * m = iBuilder->getModule();
    410     std::vector<Value *> init_args = {mKernelInstance};
    411     for (auto a : mInitialArguments) {
    412         init_args.push_back(a);
    413     }
    414     for (auto b : mStreamSetInputBuffers) {
    415         init_args.push_back(b->getStreamSetBasePtr());
    416     }
    417     for (auto b : mStreamSetOutputBuffers) {
    418         init_args.push_back(b->getStreamSetBasePtr());
    419     }
    420     std::string initFnName = mKernelName + init_suffix;
    421     Function * initMethod = m->getFunction(initFnName);
    422     if (!initMethod) {
    423         llvm::report_fatal_error("Cannot find " + initFnName);
    424     }
    425     iBuilder->CreateCall(initMethod, init_args);
    426 }
    427 
     418void BlockOrientedKernel::callGenerateDoBlockMethod() const {
     419    Function * f = getDoBlockFunction();
     420    Value * const self = getParameter(f, "self"); assert (self);
     421    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", f));
     422    generateDoBlockMethod(f, self, getBlockNo(self)); // must be implemented by the KernelBuilder subtype
     423    iBuilder->CreateRetVoid();
     424//    #ifndef NDEBUG
     425//    llvm::verifyFunction(*f, &errs());
     426//    #endif
     427}
     428
     429void BlockOrientedKernel::callGenerateDoFinalBlockMethod() const {
     430    Function * f = getDoFinalBlockFunction();
     431    Value * const self = getParameter(f, "self"); assert (self);
     432    Value * remainingBytes = getParameter(f, "remainingBytes"); assert (remainingBytes);
     433    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", f));
     434    generateFinalBlockMethod(f, self, remainingBytes, getBlockNo(self)); // possibly overridden by the KernelBuilder subtype
     435    iBuilder->CreateRetVoid();
     436//    #ifndef NDEBUG
     437//    llvm::verifyFunction(*f, &errs());
     438//    #endif
     439}
     440
     441Function * BlockOrientedKernel::getDoBlockFunction() const {
     442    return iBuilder->getModule()->getFunction(mKernelName + doBlock_suffix);
     443}
     444
     445Function * BlockOrientedKernel::getDoFinalBlockFunction() const {
     446    return iBuilder->getModule()->getFunction(mKernelName + finalBlock_suffix);
     447}
     448
     449// CONSTRUCTOR
    428450KernelBuilder::KernelBuilder(IDISA::IDISA_Builder * builder,
    429451                             std::string && kernelName,
     
    439461}
    440462
    441 KernelBuilder::~KernelBuilder() {
    442 
    443 }
    444 
     463KernelBuilder::~KernelBuilder() { }
     464
     465// CONSTRUCTOR
    445466BlockOrientedKernel::BlockOrientedKernel(IDISA::IDISA_Builder * builder,
    446467                                         std::string && kernelName,
     
    454475}
    455476
     477// CONSTRUCTOR
    456478SegmentOrientedKernel::SegmentOrientedKernel(IDISA::IDISA_Builder * builder,
    457479                                             std::string && kernelName,
  • icGREP/icgrep-devel/icgrep/kernels/kernel.h

    r5283 r5285  
    2020namespace parabix { class StreamSetBuffer; }
    2121
    22 const std::string blockNoScalar = "blockNo";
    2322const std::string logicalSegmentNoScalar = "logicalSegNo";
    2423const std::string processedItemCountSuffix = "_processedItemCount";
     
    198197    // Each kernel builder subtype must provide its own logic for generating
    199198    // doBlock calls.
    200     virtual void generateDoBlockMethod() const = 0;
    201 
    202     virtual void generateDoBlockLogic(llvm::Value * self, llvm::Value * blockNo) const;
     199    virtual void generateDoBlockMethod(llvm::Function * function, llvm::Value * self, llvm::Value * blockNo) const = 0;
    203200
    204201    // Each kernel builder subtypre must also specify the logic for processing the
     
    209206    // not be overridden.
    210207
    211     virtual void generateFinalBlockMethod() const;
     208    virtual void generateFinalBlockMethod(llvm::Function * function, llvm::Value * self, llvm::Value * remainingBytes, llvm::Value * blockNo) const;
    212209
    213210    virtual void generateDoSegmentMethod() const final;
     
    222219
    223220    virtual ~BlockOrientedKernel() { }
     221
     222    llvm::Function * getDoBlockFunction() const;
     223
     224    llvm::Function * getDoFinalBlockFunction() const;
     225
     226private:
     227    void callGenerateDoBlockMethod() const;
     228
     229    void callGenerateDoFinalBlockMethod() const;
    224230};
    225231
    226232class SegmentOrientedKernel : public KernelBuilder {
    227233protected:
     234
    228235    SegmentOrientedKernel(IDISA::IDISA_Builder * builder,
    229236                          std::string && kernelName,
  • icGREP/icgrep-devel/icgrep/kernels/mmap_kernel.h

    r5283 r5285  
    1717class MMapSourceKernel : public SegmentOrientedKernel {
    1818public:
    19     MMapSourceKernel(IDISA::IDISA_Builder * iBuilder, unsigned blocksPerSegment = 1, unsigned codeUnitWidth = 8);
    20    
     19    MMapSourceKernel(IDISA::IDISA_Builder * iBuilder, unsigned blocksPerSegment = 1, unsigned codeUnitWidth = 8); 
    2120private:
    22 
    2321    void generateDoSegmentMethod() const override;
    24 
    2522private:
    2623    const unsigned mSegmentBlocks;
    2724    const unsigned mCodeUnitWidth;
    28    
    2925};
    3026}
  • icGREP/icgrep-devel/icgrep/kernels/p2s_kernel.cpp

    r5283 r5285  
    2929    p2s_step(iBuilder, p[2], p[6], iBuilder->simd_himask(8), 4, bit22226666[1], bit22226666[0]);
    3030    p2s_step(iBuilder, p[3], p[7], iBuilder->simd_himask(8), 4, bit33337777[1], bit33337777[0]);
    31 
    3231    Value * bit00224466[4];
    3332    Value * bit11335577[4];
     
    4140}
    4241               
    43 void P2SKernel::generateDoBlockMethod() const {
    44     auto savePoint = iBuilder->saveIP();
    45     Module * m = iBuilder->getModule();
    46    
    47     Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
    48    
    49     iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
    50    
    51     Value * self = getParameter(doBlockFunction, "self");
    52     Value * blockNo = getScalarField(self, blockNoScalar);
     42void P2SKernel::generateDoBlockMethod(llvm::Function * function, llvm::Value * self, llvm::Value * blockNo) const {
    5343    Value * p_bitblock[8];
    5444    for (unsigned i = 0; i < 8; i++) {
     
    6252        iBuilder->CreateBlockAlignedStore(s_bytepack[j], ptr);
    6353    }
    64     iBuilder->CreateRetVoid();
    65     iBuilder->restoreIP(savePoint);
    6654}
    6755
     
    7563   
    7664
    77 void P2SKernelWithCompressedOutput::generateDoBlockMethod() const {
    78     auto savePoint = iBuilder->saveIP();
    79     Module * m = iBuilder->getModule();
    80     Type * i8PtrTy = iBuilder->getInt8PtrTy();
    81     Type * i32 = iBuilder->getIntNTy(32);
    82     Type * bitBlockPtrTy = llvm::PointerType::get(iBuilder->getBitBlockType(), 0);
    83 
    84     Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
    85 
    86     iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
    87     Value * self = getParameter(doBlockFunction, "self");
    88     Value * blockNo = getScalarField(self, blockNoScalar);
    89 
    90 
     65void P2SKernelWithCompressedOutput::generateDoBlockMethod(llvm::Function * function, llvm::Value * self, llvm::Value * blockNo) const {
     66    PointerType * i8PtrTy = iBuilder->getInt8PtrTy();
     67    IntegerType * i32 = iBuilder->getInt32Ty();
     68    PointerType * bitBlockPtrTy = PointerType::get(iBuilder->getBitBlockType(), 0);
    9169
    9270    Value * basisBits[8];
     
    11189    unitsGenerated = iBuilder->CreateAdd(unitsGenerated, iBuilder->CreateZExt(offset, iBuilder->getSizeTy()));
    11290    setProducedItemCount(self, "byteStream", unitsGenerated);
    113 
    114     iBuilder->CreateRetVoid();
    115     iBuilder->restoreIP(savePoint);
    11691}
    11792   
     
    126101   
    127102
    128 void P2S16Kernel::generateDoBlockMethod() const {
    129     auto savePoint = iBuilder->saveIP();
    130     Module * m = iBuilder->getModule();
    131    
    132     Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
    133    
    134     iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
    135     Value * self = getParameter(doBlockFunction, "self");
    136     Value * blockNo = getScalarField(self, blockNoScalar);
    137 
     103void P2S16Kernel::generateDoBlockMethod(Function * function, Value * self, Value * blockNo) const {
    138104    Value * hi_input[8];
    139105    for (unsigned j = 0; j < 8; ++j) {
     
    142108    }
    143109    Value * hi_bytes[8];
    144     p2s(iBuilder, hi_input, hi_bytes);
    145    
     110    p2s(iBuilder, hi_input, hi_bytes);   
    146111    Value * lo_input[8];
    147112    for (unsigned j = 0; j < 8; ++j) {
     
    150115    }
    151116    Value * lo_bytes[8];
    152     p2s(iBuilder, lo_input, lo_bytes);
    153    
     117    p2s(iBuilder, lo_input, lo_bytes);   
    154118    for (unsigned j = 0; j < 8; ++j) {
    155119        Value * merge0 = iBuilder->bitCast(iBuilder->esimd_mergel(8, hi_bytes[j], lo_bytes[j]));
     
    160124        iBuilder->CreateBlockAlignedStore(merge1, ptr1);
    161125    }
    162     iBuilder->CreateRetVoid();
    163     iBuilder->restoreIP(savePoint);
    164126}
    165127   
     
    174136
    175137   
    176 void P2S16KernelWithCompressedOutput::generateDoBlockMethod() const {
    177     auto savePoint = iBuilder->saveIP();
    178     Module * m = iBuilder->getModule();
    179     Type * i32Ty = iBuilder->getInt32Ty();
    180     Type * bitBlockPtrTy = iBuilder->getBitBlockType()->getPointerTo();
    181 
    182     Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
    183 
    184     iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
    185 
    186     Value * self = getParameter(doBlockFunction, "self");
    187     Value * blockNo = getScalarField(self, blockNoScalar);
    188 
     138void P2S16KernelWithCompressedOutput::generateDoBlockMethod(Function * function, Value *self, Value *blockNo) const {
     139    IntegerType * i32Ty = iBuilder->getInt32Ty();
     140    PointerType * bitBlockPtrTy = iBuilder->getBitBlockType()->getPointerTo();
    189141    Value * hi_input[8];
    190142    for (unsigned j = 0; j < 8; ++j) {
     
    194146    Value * hi_bytes[8];
    195147    p2s(iBuilder, hi_input, hi_bytes);
    196 
    197148    Value * lo_input[8];
    198149    for (unsigned j = 0; j < 8; ++j) {
     
    202153    Value * lo_bytes[8];
    203154    p2s(iBuilder, lo_input, lo_bytes);
    204 
    205155    Value * delCountBlock_ptr = getStream(self, "deletionCounts", blockNo, iBuilder->getInt32(0));
    206156    Value * unit_counts = iBuilder->fwCast(iBuilder->getBitBlockWidth() / 16, iBuilder->CreateBlockAlignedLoad(delCountBlock_ptr));
    207 
    208157    PointerType * int16PtrTy = PointerType::get(iBuilder->getInt16Ty(), 0);
    209158    ConstantInt * stride = iBuilder->getSize(iBuilder->getStride());
     
    222171    i16UnitsGenerated = iBuilder->CreateAdd(i16UnitsGenerated, iBuilder->CreateZExt(offset, iBuilder->getSizeTy()));
    223172    setProducedItemCount(self, "i16Stream", i16UnitsGenerated);
    224     iBuilder->CreateRetVoid();
    225     iBuilder->restoreIP(savePoint);
    226 }
    227 
    228 void P2S16KernelWithCompressedOutput::generateFinalBlockMethod() const {
    229     auto savePoint = iBuilder->saveIP();
    230     Module * m = iBuilder->getModule();
    231     Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
    232     Function * finalBlockFunction = m->getFunction(mKernelName + finalBlock_suffix);
    233     iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "fb_entry", finalBlockFunction, 0));
    234     Function::arg_iterator args = finalBlockFunction->arg_begin();
    235     Value * self = &*(args++);
    236     std::vector<Value *> doBlockArgs = {self};
    237     iBuilder->CreateCall(doBlockFunction, doBlockArgs);
    238     iBuilder->CreateRetVoid();
    239     iBuilder->restoreIP(savePoint);
    240173}
    241174   
  • icGREP/icgrep-devel/icgrep/kernels/p2s_kernel.h

    r5283 r5285  
    1515public:
    1616    P2SKernel(IDISA::IDISA_Builder * iBuilder);
    17    
    1817private:
    19     void generateDoBlockMethod() const override;
    20    
     18    void generateDoBlockMethod(llvm::Function * function, llvm::Value * self, llvm::Value * blockNo) const override;
    2119};
    2220
     
    2523    P2SKernelWithCompressedOutput(IDISA::IDISA_Builder * iBuilder);   
    2624private:
    27     void generateDoBlockMethod() const override;
     25    void generateDoBlockMethod(llvm::Function * function, llvm::Value * self, llvm::Value * blockNo) const override;
    2826};
    29    
    3027
    3128class P2S16Kernel : public BlockOrientedKernel {
     
    3330    P2S16Kernel(IDISA::IDISA_Builder * iBuilder);   
    3431private:
    35     void generateDoBlockMethod() const override;
    36    
     32    void generateDoBlockMethod(llvm::Function * function, llvm::Value * self, llvm::Value * blockNo) const override;
    3733};
    38 
    3934   
    4035class P2S16KernelWithCompressedOutput : public BlockOrientedKernel {
    4136public:
    4237    P2S16KernelWithCompressedOutput(IDISA::IDISA_Builder * iBuilder);
    43        
    4438private:
    45     void generateDoBlockMethod() const override;
    46     void generateFinalBlockMethod() const override;
     39    void generateDoBlockMethod(llvm::Function * function, llvm::Value * self, llvm::Value * blockNo) const override;
    4740};
    4841   
  • icGREP/icgrep-devel/icgrep/kernels/radix64.cpp

    r5283 r5285  
    107107    Value * loopItemsToDo = iBuilder->CreateSub(itemsAvail, excessItems);
    108108
    109     Value * blockNo = getScalarField(self, blockNoScalar);
     109    Value * blockNo = getBlockNo(self);
    110110
    111111    // A block is made up of 8 packs.  Get the pointer to the first pack (changes the type of the pointer only).
     
    173173    setProcessedItemCount(self, "sourceStream", processed);
    174174   
    175     setScalarField(self, blockNoScalar, iBuilder->CreateUDiv(processed, stride));
     175    setBlockNo(self, iBuilder->CreateUDiv(processed, stride));
    176176    // We have produced 4 output bytes for every 3 input bytes.
    177177    Value * totalProduced = iBuilder->CreateMul(iBuilder->CreateUDiv(processed, Const3), Const4);
     
    247247    setProcessedItemCount(self, "sourceStream", processed);
    248248
    249     setScalarField(self, blockNoScalar, iBuilder->CreateUDiv(processed, stride));
     249    setBlockNo(self, iBuilder->CreateUDiv(processed, stride));
    250250    // We have produced 4 output bytes for every 3 input bytes.  If the number of input
    251251    // bytes is not a multiple of 3, then we have one more output byte for each excess
     
    271271//                                   ba    bits to move 12 positions left
    272272//    xwvuts|  nlkjzy|  barqpm|  hgfedc    Target
    273 void radix64Kernel::generateDoBlockLogic(Value * self, Value * blockNo) const {
    274 
     273void radix64Kernel::generateDoBlockMethod(Function * function, Value * self, Value * blockNo) const {
    275274    Value * step_right_6 = iBuilder->simd_fill(32, ConstantInt::get(iBuilder->getInt32Ty(), 0x00C00000));
    276275    Value * step_left_8 = iBuilder->simd_fill(32, ConstantInt::get(iBuilder->getInt32Ty(), 0x003F0000));
     
    279278    Value * step_right_2 = iBuilder->simd_fill(32, ConstantInt::get(iBuilder->getInt32Ty(), 0x000000FC));
    280279    Value * step_left_12 = iBuilder->simd_fill(32, ConstantInt::get(iBuilder->getInt32Ty(), 0x00000003));
    281    
    282280    for (unsigned i = 0; i < 8; i++) {
    283281        Value * expandedStream = getStream(self, "expandedStream", blockNo, iBuilder->getInt32(0), iBuilder->getInt32(i));
    284282        Value * bytepack = iBuilder->CreateBlockAlignedLoad(expandedStream);
    285 
    286283        Value * right_6_result = iBuilder->simd_srli(32, iBuilder->simd_and(bytepack, step_right_6), 6);
    287284        Value * right_4_result = iBuilder->simd_srli(32, iBuilder->simd_and(bytepack, step_right_4), 4);
     285        Value * mid = iBuilder->simd_or(right_6_result, right_4_result);
    288286        Value * right_2_result = iBuilder->simd_srli(32, iBuilder->simd_and(bytepack, step_right_2), 2);
     287        mid = iBuilder->simd_or(mid, right_2_result);
    289288        Value * left_8_result = iBuilder->simd_slli(32, iBuilder->simd_and(bytepack, step_left_8), 8);
     289        mid = iBuilder->simd_or(mid, left_8_result);
    290290        Value * left_10_result = iBuilder->simd_slli(32, iBuilder->simd_and(bytepack, step_left_10), 10);
     291        mid = iBuilder->simd_or(mid, left_10_result);
    291292        Value * left_12_result = iBuilder->simd_slli(32, iBuilder->simd_and(bytepack, step_left_12), 12);
    292 
    293         Value * mid = right_6_result;
    294         mid = iBuilder->simd_or(mid, right_4_result);
    295         mid = iBuilder->simd_or(mid, right_2_result);
    296         mid = iBuilder->simd_or(mid, left_8_result);
    297         mid = iBuilder->simd_or(mid, left_10_result);
    298293        mid = iBuilder->simd_or(mid, left_12_result);
    299294        Value * radix64pack = iBuilder->bitCast(mid);
    300 
    301295        Value * radix64stream = getStream(self, "radix64stream",blockNo, iBuilder->getInt32(0), iBuilder->getInt32(i));
    302296        iBuilder->CreateBlockAlignedStore(radix64pack, radix64stream);
     
    304298    Value * produced = getProducedItemCount(self, "radix64stream");
    305299    produced = iBuilder->CreateAdd(produced, iBuilder->getSize(iBuilder->getStride()));
    306     setProducedItemCount(self, "radix64stream", produced);   
    307 }
    308 
    309 void radix64Kernel::generateFinalBlockMethod() const {
    310     auto savePoint = iBuilder->saveIP();
    311     Module * m = iBuilder->getModule();
    312     Function * finalBlockFunction = m->getFunction(mKernelName + finalBlock_suffix);
    313     BasicBlock * radix64_fb_entry = BasicBlock::Create(iBuilder->getContext(), "radix64_fb_entry", finalBlockFunction, 0);
    314     iBuilder->SetInsertPoint(radix64_fb_entry);
    315     BasicBlock * radix64_loop = BasicBlock::Create(iBuilder->getContext(), "radix64_loop", finalBlockFunction, 0);
    316     BasicBlock * loopExit = BasicBlock::Create(iBuilder->getContext(), "loopExit", finalBlockFunction, 0);
    317     BasicBlock * handleRemainFirstByte = BasicBlock::Create(iBuilder->getContext(), "handleRemainFirstByte", finalBlockFunction, 0);
    318     BasicBlock * handleRemainSecondByte = BasicBlock::Create(iBuilder->getContext(), "handleRemainSecondByte", finalBlockFunction, 0);
    319     BasicBlock * handleNoRemainSecondByte = BasicBlock::Create(iBuilder->getContext(), "handleNoRemainSecondByte", finalBlockFunction, 0);
    320     BasicBlock * fbExit = BasicBlock::Create(iBuilder->getContext(), "fbExit", finalBlockFunction, 0);
     300    setProducedItemCount(self, "radix64stream", produced);
     301}
     302
     303void radix64Kernel::generateFinalBlockMethod(Function * function, Value *self, Value * remainingBytes, Value * blockNo) const {
     304
     305    BasicBlock * entry = iBuilder->GetInsertBlock();
     306    BasicBlock * radix64_loop = BasicBlock::Create(iBuilder->getContext(), "radix64_loop", function, 0);
     307    BasicBlock * loopExit = BasicBlock::Create(iBuilder->getContext(), "loopExit", function, 0);
     308    BasicBlock * handleRemainFirstByte = BasicBlock::Create(iBuilder->getContext(), "handleRemainFirstByte", function, 0);
     309    BasicBlock * handleRemainSecondByte = BasicBlock::Create(iBuilder->getContext(), "handleRemainSecondByte", function, 0);
     310    BasicBlock * handleNoRemainSecondByte = BasicBlock::Create(iBuilder->getContext(), "handleNoRemainSecondByte", function, 0);
     311    BasicBlock * fbExit = BasicBlock::Create(iBuilder->getContext(), "fbExit", function, 0);
    321312    // Final Block arguments: self, remaining.
    322     Function::arg_iterator args = finalBlockFunction->arg_begin();
    323     Value * self = &*(args++);
    324     Value * remainingBytes = &*(args++);
    325313    Value * remainMod4 = iBuilder->CreateAnd(remainingBytes, iBuilder->getSize(3));
    326314
    327315    const unsigned PACK_SIZE = iBuilder->getStride()/8;
    328316    Constant * packSize = iBuilder->getSize(PACK_SIZE);
    329     Value * blockNo = getScalarField(self, blockNoScalar);
    330317
    331318    Value * step_right_6 = iBuilder->simd_fill(32, iBuilder->getInt32(0x00C00000));
     
    336323    Value * step_left_12 = iBuilder->simd_fill(32, iBuilder->getInt32(0x00000003));
    337324
    338 
    339325    // Enter the loop only if there is at least one byte remaining to process.
    340326    iBuilder->CreateCondBr(iBuilder->CreateICmpEQ(remainingBytes, iBuilder->getSize(0)), fbExit, radix64_loop);
     
    343329    PHINode * idx = iBuilder->CreatePHI(iBuilder->getInt32Ty(), 2);
    344330    PHINode * loopRemain = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
    345     idx->addIncoming(ConstantInt::getNullValue(iBuilder->getInt32Ty()), radix64_fb_entry);
    346     loopRemain->addIncoming(remainingBytes, radix64_fb_entry);
     331    idx->addIncoming(ConstantInt::getNullValue(iBuilder->getInt32Ty()), entry);
     332    loopRemain->addIncoming(remainingBytes, entry);
    347333
    348334    Value * expandedStreamLoopPtr = getStream(self, "expandedStream", blockNo, iBuilder->getInt32(0), idx);
     
    426412    Value * produced = iBuilder->CreateAdd(getProducedItemCount(self, "radix64stream"), iBuilder->CreateAdd(remainingBytes, outputNumberAdd));
    427413    setProducedItemCount(self, "radix64stream", produced);
    428 
    429     iBuilder->CreateRetVoid();
    430     iBuilder->restoreIP(savePoint);
    431 }
    432 
    433 void radix64Kernel::generateDoBlockMethod() const {
    434     auto savePoint = iBuilder->saveIP();
    435 
    436     Function * doBlockFunction = iBuilder->getModule()->getFunction(mKernelName + doBlock_suffix);
    437 
    438     iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
    439 
    440     Value * self = getParameter(doBlockFunction, "self");
    441     Value * blockNo = getScalarField(self, blockNoScalar);
    442 
    443     generateDoBlockLogic(self, blockNo);
    444 
    445     iBuilder->CreateRetVoid();
    446     iBuilder->restoreIP(savePoint);
    447 }
    448 
    449 void base64Kernel::generateDoBlockLogic(Value * self, Value * blockNo) const {       
     414}
     415
     416void base64Kernel::generateDoBlockMethod(Function * function, Value * self, Value * blockNo) const {
    450417    for (unsigned i = 0; i < 8; i++) {
    451418        Value * radix64stream_ptr = getStream(self, "radix64stream", blockNo, iBuilder->getInt32(0), iBuilder->getInt32(i));
     
    474441}
    475442
    476 
    477443// Special processing for the base 64 format.   The output must always contain a multiple
    478444// of 4 bytes.   When the number of radix 64 values is not a multiple of 4
    479445// number of radix 64 values
    480 void base64Kernel::generateFinalBlockMethod() const {
    481     auto savePoint = iBuilder->saveIP();
    482     Module * m = iBuilder->getModule();
    483     Function * finalBlockFunction = m->getFunction(mKernelName + finalBlock_suffix);
    484     BasicBlock * base64_fb_entry = BasicBlock::Create(iBuilder->getContext(), "base64_fb_entry", finalBlockFunction, 0);
    485     iBuilder->SetInsertPoint(base64_fb_entry);
    486     BasicBlock * base64_loop = BasicBlock::Create(iBuilder->getContext(), "base64_loop", finalBlockFunction, 0);
    487     BasicBlock * loopExit = BasicBlock::Create(iBuilder->getContext(), "loopExit", finalBlockFunction, 0);
    488     BasicBlock * doPadding = BasicBlock::Create(iBuilder->getContext(), "doPadding", finalBlockFunction, 0);
    489     BasicBlock * doPadding2 = BasicBlock::Create(iBuilder->getContext(), "doPadding2", finalBlockFunction, 0);
    490     BasicBlock * fbExit = BasicBlock::Create(iBuilder->getContext(), "fbExit", finalBlockFunction, 0);
    491     // Final Block arguments: self, remaining.
    492     Function::arg_iterator args = finalBlockFunction->arg_begin();
    493     Value * self = &*(args++);
    494     Value * remainingBytes = &*(args++);
     446void base64Kernel::generateFinalBlockMethod(Function * function, Value * self, Value * remainingBytes, Value * blockNo) const {
     447
     448    BasicBlock * entry = iBuilder->GetInsertBlock();
     449    BasicBlock * base64_loop = BasicBlock::Create(iBuilder->getContext(), "base64_loop", function, 0);
     450    BasicBlock * loopExit = BasicBlock::Create(iBuilder->getContext(), "loopExit", function, 0);
     451    BasicBlock * doPadding = BasicBlock::Create(iBuilder->getContext(), "doPadding", function, 0);
     452    BasicBlock * doPadding2 = BasicBlock::Create(iBuilder->getContext(), "doPadding2", function, 0);
     453    BasicBlock * fbExit = BasicBlock::Create(iBuilder->getContext(), "fbExit", function, 0);
     454
    495455    Value * remainMod4 = iBuilder->CreateAnd(remainingBytes, iBuilder->getSize(3));
    496456    Value * padBytes = iBuilder->CreateSub(iBuilder->getSize(4), remainMod4);
     
    498458
    499459    Constant * packSize = iBuilder->getSize(iBuilder->getStride() / 8);
    500     Value * blockNo = getScalarField(self, blockNoScalar);
    501460
    502461    // Enter the loop only if there is at least one byte remaining to process.
    503462    iBuilder->CreateCondBr(iBuilder->CreateICmpEQ(remainingBytes, iBuilder->getSize(0)), fbExit, base64_loop);
    504    
     463
    505464    iBuilder->SetInsertPoint(base64_loop);
    506465    PHINode * idx = iBuilder->CreatePHI(iBuilder->getInt32Ty(), 2);
    507466    PHINode * loopRemain = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
    508     idx->addIncoming(ConstantInt::getNullValue(iBuilder->getInt32Ty()), base64_fb_entry);
    509     loopRemain->addIncoming(remainingBytes, base64_fb_entry);
     467    idx->addIncoming(ConstantInt::getNullValue(iBuilder->getInt32Ty()), entry);
     468    loopRemain->addIncoming(remainingBytes, entry);
    510469    Value * radix64streamPtr = getStream(self, "radix64stream", blockNo, iBuilder->getInt32(0), idx);
    511470    Value * bytepack = iBuilder->CreateBlockAlignedLoad(radix64streamPtr);
     
    542501    Value * produced = iBuilder->CreateAdd(getProducedItemCount(self, "base64stream"), iBuilder->CreateAdd(remainingBytes, padBytes));
    543502    setProducedItemCount(self, "base64stream", produced);
    544     iBuilder->CreateRetVoid();
    545     iBuilder->restoreIP(savePoint);
    546 }
    547 
    548 void base64Kernel::generateDoBlockMethod() const {
    549     auto savePoint = iBuilder->saveIP();
    550 
    551     Function * doBlockFunction = iBuilder->getModule()->getFunction(mKernelName + doBlock_suffix);
    552 
    553     iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
    554 
    555     Value * self = getParameter(doBlockFunction, "self");
    556     Value * blockNo = getScalarField(self, blockNoScalar);
    557 
    558     generateDoBlockLogic(self, blockNo);
    559 
    560     iBuilder->CreateRetVoid();
    561     iBuilder->restoreIP(savePoint);
    562503}
    563504
  • icGREP/icgrep-devel/icgrep/kernels/radix64.h

    r5283 r5285  
    2020 
    2121class expand3_4Kernel : public SegmentOrientedKernel {
    22 public:
    23    
     22public:   
    2423    expand3_4Kernel(IDISA::IDISA_Builder * iBuilder);
    25    
    2624private:
    2725    void generateDoSegmentMethod() const override;
    28    
    2926};
    3027
    3128class radix64Kernel : public BlockOrientedKernel {
    3229public:
    33    
    3430    radix64Kernel(IDISA::IDISA_Builder * iBuilder);
    3531private:
    36     virtual void generateDoBlockLogic(llvm::Value * self, llvm::Value * blockNo) const override;
    37     virtual void generateDoBlockMethod() const override;
    38     virtual void generateFinalBlockMethod() const override;
     32    virtual void generateDoBlockMethod(llvm::Function * function, llvm::Value * self, llvm::Value * blockNo) const override;
     33    virtual void generateFinalBlockMethod(llvm::Function * function, llvm::Value * self, llvm::Value * remainingBytes, llvm::Value * blockNo) const override;
    3934};
    4035
    4136class base64Kernel : public BlockOrientedKernel {
    4237public:
    43    
    44     base64Kernel(IDISA::IDISA_Builder * iBuilder);   
     38    base64Kernel(IDISA::IDISA_Builder * iBuilder);
    4539private:
    46     virtual void generateDoBlockLogic(llvm::Value * self, llvm::Value * blockNo) const override;
    47     virtual void generateFinalBlockMethod() const override;
    48     virtual void generateDoBlockMethod() const override;
     40    virtual void generateFinalBlockMethod(llvm::Function * function, llvm::Value * self, llvm::Value * remainingBytes, llvm::Value * blockNo) const override;
     41    virtual void generateDoBlockMethod(llvm::Function * function, llvm::Value * self, llvm::Value * blockNo) const override;
    4942   
    5043};
  • icGREP/icgrep-devel/icgrep/kernels/s2p_kernel.cpp

    r5283 r5285  
    9292#endif
    9393   
    94    
    9594#if 0
    96 
    97 
    9895void generateS2P_16Kernel(Module *, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder) {
    9996    kBuilder->addInputStream(16, "unit_pack");
     
    121118    }
    122119    kBuilder->finalize();
    123 }
    124    
     120}   
    125121#endif
    126122   
    127 void S2PKernel::generateFinalBlockMethod() const {
    128     /* Prepare the s2p final block function:
    129      assumption: if remaining bytes is greater than 0, it is safe to read a full block of bytes.
    130      if remaining bytes is zero, no read should be performed (e.g. for mmapped buffer).
    131      */
    132     auto savePoint = iBuilder->saveIP();
    133     Module * m = iBuilder->getModule();
    134     Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
    135     Function * finalBlockFunction = m->getFunction(mKernelName + finalBlock_suffix);
    136     iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "fb_entry", finalBlockFunction, 0));
    137    
    138     Value * self = getParameter(finalBlockFunction, "self");
    139     Value * remainingBytes = getParameter(finalBlockFunction, "remainingBytes");
    140    
    141     BasicBlock * finalPartialBlock = BasicBlock::Create(iBuilder->getContext(), "partial", finalBlockFunction, 0);
    142     BasicBlock * finalEmptyBlock = BasicBlock::Create(iBuilder->getContext(), "empty", finalBlockFunction, 0);
    143     BasicBlock * exitBlock = BasicBlock::Create(iBuilder->getContext(), "exit", finalBlockFunction, 0);
    144    
    145     Value * emptyBlockCond = iBuilder->CreateICmpEQ(remainingBytes, iBuilder->getSize(0));
    146     iBuilder->CreateCondBr(emptyBlockCond, finalEmptyBlock, finalPartialBlock);
    147     iBuilder->SetInsertPoint(finalPartialBlock);
    148     iBuilder->CreateCall(doBlockFunction, {self});
    149    
    150     iBuilder->CreateBr(exitBlock);
    151    
    152     iBuilder->SetInsertPoint(finalEmptyBlock);
    153     Value * blockNo = getScalarField(self, blockNoScalar);
    154     Value * basisBitsPtr = getStreamView(self, "basisBits", blockNo, iBuilder->getInt64(0));
    155     iBuilder->CreateStore(Constant::getNullValue(basisBitsPtr->getType()->getPointerElementType()), basisBitsPtr);
    156     iBuilder->CreateBr(exitBlock);
    157    
    158     iBuilder->SetInsertPoint(exitBlock);
    159     iBuilder->CreateRetVoid();
    160     iBuilder->restoreIP(savePoint);
    161 }
    162    
    163 void S2PKernel::generateDoBlockLogic(Value * self, Value * blockNo) const {
     123void S2PKernel::generateDoBlockMethod(llvm::Function * function, llvm::Value * self, llvm::Value * blockNo) const {
    164124    Value * bytepack[8];
    165125    for (unsigned i = 0; i < 8; i++) {
     
    174134    }
    175135}
     136
     137void S2PKernel::generateFinalBlockMethod(llvm::Function * function, llvm::Value * self, Value * remainingBytes, llvm::Value * blockNo) const {
     138    /* Prepare the s2p final block function:
     139     assumption: if remaining bytes is greater than 0, it is safe to read a full block of bytes.
     140     if remaining bytes is zero, no read should be performed (e.g. for mmapped buffer).
     141     */
    176142   
    177 void S2PKernel::generateDoBlockMethod() const {
    178     auto savePoint = iBuilder->saveIP();
    179 
    180     Function * doBlockFunction = iBuilder->getModule()->getFunction(mKernelName + doBlock_suffix);
     143    BasicBlock * finalPartialBlock = BasicBlock::Create(iBuilder->getContext(), "partial", function, 0);
     144    BasicBlock * finalEmptyBlock = BasicBlock::Create(iBuilder->getContext(), "empty", function, 0);
     145    BasicBlock * exitBlock = BasicBlock::Create(iBuilder->getContext(), "exit", function, 0);
    181146   
    182     iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction));
     147    Value * emptyBlockCond = iBuilder->CreateICmpEQ(remainingBytes, iBuilder->getSize(0));
     148    iBuilder->CreateCondBr(emptyBlockCond, finalEmptyBlock, finalPartialBlock);
     149    iBuilder->SetInsertPoint(finalPartialBlock);
     150    iBuilder->CreateCall(getDoBlockFunction(), {self});
    183151   
    184     Value * self = getParameter(doBlockFunction, "self");
    185     Value * blockNo = getScalarField(self, blockNoScalar);
     152    iBuilder->CreateBr(exitBlock);
    186153   
    187     generateDoBlockLogic(self, blockNo);
    188 
    189     iBuilder->CreateRetVoid();
    190     iBuilder->restoreIP(savePoint);
     154    iBuilder->SetInsertPoint(finalEmptyBlock);
     155    Value * basisBitsPtr = getStreamView(self, "basisBits", blockNo, iBuilder->getInt64(0));
     156    iBuilder->CreateStore(Constant::getNullValue(basisBitsPtr->getType()->getPointerElementType()), basisBitsPtr);
     157    iBuilder->CreateBr(exitBlock);
     158   
     159    iBuilder->SetInsertPoint(exitBlock);
    191160}
    192161
  • icGREP/icgrep-devel/icgrep/kernels/s2p_kernel.h

    r5283 r5285  
    1313
    1414class S2PKernel : public BlockOrientedKernel {
    15 public:
    16    
     15public:   
    1716    S2PKernel(IDISA::IDISA_Builder * builder);
    18 
    1917    virtual ~S2PKernel() {}
    20        
    2118private:
    22     void generateDoBlockLogic(llvm::Value * self, llvm::Value * blockNo) const override;
    23     void generateDoBlockMethod() const override;
    24     void generateFinalBlockMethod() const override;
    25    
     19    void generateDoBlockMethod(llvm::Function * function, llvm::Value * self, llvm::Value * blockNo) const override;
     20    void generateFinalBlockMethod(llvm::Function * function, llvm::Value * self, llvm::Value * remainingBytes, llvm::Value * blockNo) const override;   
    2621};
    2722
  • icGREP/icgrep-devel/icgrep/kernels/scanmatchgen.cpp

    r5267 r5285  
    3737    return iBuilder->CreateAnd(bits_minus1, bits);
    3838}
    39 
    4039       
    41 void ScanMatchKernel::generateDoBlockMethod() const {
     40void ScanMatchKernel::generateDoBlockMethod(Function * function, Value * self, Value * blockNo) const {
     41
    4242    auto savePoint = iBuilder->saveIP();
    43     Module * m = iBuilder->getModule();
    44     Function * scanWordFunction = generateScanWordRoutine(m);
     43    Function * scanWordFunction = generateScanWordRoutine(iBuilder->getModule());
     44    iBuilder->restoreIP(savePoint);
     45
    4546    IntegerType * T = iBuilder->getSizeTy();
    4647    const unsigned fieldCount = iBuilder->getBitBlockWidth() / T->getBitWidth();
    47 
    4848    Type * scanwordVectorType =  VectorType::get(T, fieldCount);
    49 
    50     Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
    51 
    52     iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
    53     Value * kernelStuctParam = getParameter(doBlockFunction, "self");
    54     Value * blockNo = getScalarField(kernelStuctParam, blockNoScalar);
    55     Value * scanwordPos = iBuilder->CreateMul(blockNo, ConstantInt::get(blockNo->getType(), iBuilder->getBitBlockWidth()));
    56    
    57     Value * recordStart = getScalarField(kernelStuctParam, "LineStart");
    58     Value * recordNum = getScalarField(kernelStuctParam, "LineNum");
    59     Value * matches = iBuilder->CreateBlockAlignedLoad(getStream(kernelStuctParam, "matchResults", blockNo, iBuilder->getInt32(0)));
    60     Value * linebreaks = iBuilder->CreateBlockAlignedLoad(getStream(kernelStuctParam, "matchResults", blockNo, iBuilder->getInt32(1)));
     49    Value * scanwordPos = iBuilder->CreateMul(blockNo, ConstantInt::get(blockNo->getType(), iBuilder->getBitBlockWidth()));   
     50    Value * recordStart = getScalarField(self, "LineStart");
     51    Value * recordNum = getScalarField(self, "LineNum");
     52    Value * matches = iBuilder->CreateBlockAlignedLoad(getStream(self, "matchResults", blockNo, iBuilder->getInt32(0)));
     53    Value * linebreaks = iBuilder->CreateBlockAlignedLoad(getStream(self, "matchResults", blockNo, iBuilder->getInt32(1)));
    6154    Value * matchWordVector = iBuilder->CreateBitCast(matches, scanwordVectorType);
    6255    Value * breakWordVector = iBuilder->CreateBitCast(linebreaks, scanwordVectorType);
     
    6457        Value * matchWord = iBuilder->CreateExtractElement(matchWordVector, ConstantInt::get(T, i));
    6558        Value * recordBreaksWord = iBuilder->CreateExtractElement(breakWordVector, ConstantInt::get(T, i));
    66         Value * wordResult = iBuilder->CreateCall(scanWordFunction, {kernelStuctParam, matchWord, recordBreaksWord, scanwordPos, recordStart, recordNum});
     59        Value * wordResult = iBuilder->CreateCall(scanWordFunction, {self, matchWord, recordBreaksWord, scanwordPos, recordStart, recordNum});
    6760        scanwordPos = iBuilder->CreateAdd(scanwordPos, ConstantInt::get(T, T->getBitWidth()));
    6861        recordStart = iBuilder->CreateExtractValue(wordResult, std::vector<unsigned>({0}));
    6962        recordNum = iBuilder->CreateExtractValue(wordResult, std::vector<unsigned>({1}));
    7063    }
    71     setScalarField(kernelStuctParam, "LineStart", recordStart);
    72     setScalarField(kernelStuctParam, "LineNum", recordNum);
    73     iBuilder -> CreateRetVoid();
    74     iBuilder->restoreIP(savePoint);
     64    setScalarField(self, "LineStart", recordStart);
     65    setScalarField(self, "LineNum", recordNum);
    7566}
    7667
     
    253244}
    254245
    255 }
     246ScanMatchKernel::ScanMatchKernel(IDISA::IDISA_Builder * iBuilder, GrepType grepType)
     247: BlockOrientedKernel(iBuilder, "scanMatch",
     248    {Binding{iBuilder->getStreamSetTy(2, 1), "matchResults"}},
     249    {},
     250    {Binding{iBuilder->getInt8PtrTy(), "FileBuf"}, Binding{iBuilder->getSizeTy(), "FileSize"}, Binding{iBuilder->getSizeTy(), "FileIdx"}},
     251    {},
     252    {Binding{iBuilder->getSizeTy(), "BlockNo"}, Binding{iBuilder->getSizeTy(), "LineStart"}, Binding{iBuilder->getSizeTy(), "LineNum"}})
     253, mGrepType(grepType) {
     254
     255}
     256
     257}
  • icGREP/icgrep-devel/icgrep/kernels/scanmatchgen.h

    r5283 r5285  
    1616class ScanMatchKernel : public BlockOrientedKernel {
    1717public:
    18     ScanMatchKernel(IDISA::IDISA_Builder * iBuilder, GrepType grepType) :
    19     BlockOrientedKernel(iBuilder, "scanMatch",
    20                   {Binding{iBuilder->getStreamSetTy(2, 1), "matchResults"}},
    21                     {},
    22                     {Binding{iBuilder->getInt8PtrTy(), "FileBuf"}, Binding{iBuilder->getSizeTy(), "FileSize"}, Binding{iBuilder->getSizeTy(), "FileIdx"}},
    23                     {},
    24                     {Binding{iBuilder->getSizeTy(), "BlockNo"}, Binding{iBuilder->getSizeTy(), "LineStart"}, Binding{iBuilder->getSizeTy(), "LineNum"}}),
    25 
    26     mGrepType(grepType) {}
    27        
     18    ScanMatchKernel(IDISA::IDISA_Builder * iBuilder, GrepType grepType);
     19protected:
     20    void generateDoBlockMethod(llvm::Function * function, llvm::Value * self, llvm::Value * blockNo) const override;
    2821private:
    29 
    30     void generateDoBlockMethod() const override;
    31 
    3222    llvm::Function * generateScanWordRoutine(llvm::Module * m) const;
    33 
    3423private:
    3524    GrepType mGrepType;
  • icGREP/icgrep-devel/icgrep/kernels/stdin_kernel.h

    r5283 r5285  
    1717public:
    1818    StdInKernel(IDISA::IDISA_Builder * iBuilder, unsigned blocksPerSegment = 1, unsigned codeUnitWidth = 8);
    19    
     19protected:
     20    void generateDoSegmentMethod() const override final;
    2021private:
    2122    unsigned mSegmentBlocks;
    2223    unsigned mCodeUnitWidth;
    23  
    24     void generateDoSegmentMethod() const override;
    25    
    2624};
    2725   
     
    2927class FileSource : public SegmentOrientedKernel {
    3028public:
    31  
    3229    FileSource(IDISA::IDISA_Builder * iBuilder, unsigned blocksPerSegment = 1, unsigned codeUnitWidth = 8);
    33  
    34 private:
    35  
    36     void generateInitMethod() const override;
    37     void generateDoSegmentMethod() const override;
    38  
     30protected:
     31    void generateInitMethod() const override final;
     32    void generateDoSegmentMethod() const override final;
    3933private:
    4034    unsigned mSegmentBlocks;
     
    4236 
    4337};
     38
    4439}
    4540
  • icGREP/icgrep-devel/icgrep/kernels/stdout_kernel.cpp

    r5283 r5285  
    3434    Value * itemsToDo = iBuilder->CreateSub(producerPos, processed);
    3535   
    36     Value * blockNo = getScalarField(self, blockNoScalar);
     36    Value * blockNo = getBlockNo(self);
    3737    Value * byteOffset = iBuilder->CreateMul(iBuilder->CreateURem(processed, blockItems), itemBytes);
    3838    Value * bytePtr = getStreamView(i8PtrTy, self, "codeUnitBuffer", blockNo, byteOffset);
     
    4141    processed = iBuilder->CreateAdd(processed, itemsToDo);
    4242    setProcessedItemCount(self, "codeUnitBuffer", processed);
    43     setScalarField(self, blockNoScalar, iBuilder->CreateUDiv(processed, blockItems));
     43    setBlockNo(self, iBuilder->CreateUDiv(processed, blockItems));
    4444
    4545    iBuilder->CreateRetVoid();
     
    102102    Value * IOstreamPtr = getScalarField(self, "IOstreamPtr");
    103103   
    104     Value * blockNo = getScalarField(self, blockNoScalar);
     104    Value * blockNo = getBlockNo(self);
    105105    Value * byteOffset = iBuilder->CreateMul(iBuilder->CreateURem(processed, blockItems), itemBytes);
    106106    Value * bytePtr = getStreamView(i8PtrTy, self, "codeUnitBuffer", blockNo, byteOffset);
     
    109109    processed = iBuilder->CreateAdd(processed, itemsToDo);
    110110    setProcessedItemCount(self, "codeUnitBuffer", processed);
    111     setScalarField(self, blockNoScalar, iBuilder->CreateUDiv(processed, blockItems));
     111    setBlockNo(self, iBuilder->CreateUDiv(processed, blockItems));
    112112    iBuilder->CreateCondBr(doFinal, closeFile, fileOutExit);
    113113   
  • icGREP/icgrep-devel/icgrep/kernels/stdout_kernel.h

    r5283 r5285  
    1414class StdOutKernel : public SegmentOrientedKernel {
    1515public:
    16 
    17     StdOutKernel(IDISA::IDISA_Builder * iBuilder, unsigned codeUnitWidth);
    18    
     16    StdOutKernel(IDISA::IDISA_Builder * iBuilder, unsigned codeUnitWidth);
    1917private:
    20  
    21     void generateDoSegmentMethod() const override;
    22 
     18    void generateDoSegmentMethod() const override final;
    2319private:
    2420    const unsigned mCodeUnitWidth;
     
    2824
    2925class FileSink : public SegmentOrientedKernel {
    30 public:
    31    
     26public: 
    3227    FileSink(IDISA::IDISA_Builder * iBuilder, unsigned codeUnitWidth);
    33    
    34 private:
    35    
    36     void generateInitMethod() const override;
    37     void generateDoSegmentMethod() const override;
    38    
     28protected:
     29    void generateInitMethod() const override final;
     30    void generateDoSegmentMethod() const override final;
    3931private:
    4032    const unsigned mCodeUnitWidth;
  • icGREP/icgrep-devel/icgrep/pablo/builder.hpp

    r5283 r5285  
    248248    }
    249249
    250     inline String * getName(const llvm::StringRef & name) const {
    251         return mPb->getName(name);
    252     }
    253 
    254250    inline String * makeName(const llvm::StringRef & prefix) const {
    255251        return mPb->makeName(prefix);
  • icGREP/icgrep-devel/icgrep/pablo/carry_manager.cpp

    r5267 r5285  
    8787    assert (scope);
    8888    if (mLoopDepth++ == 0) {
    89         Value * const blockNo = mKernel->getScalarField(mSelf, blockNoScalar);
     89        Value * const blockNo = mKernel->getBlockNo(mSelf);
    9090        mLoopSelector = iBuilder->CreateAnd(blockNo, ConstantInt::get(blockNo->getType(), 1));
    9191    }
     
    393393    // Create a mask to implement circular buffer indexing
    394394    Value * indexMask = iBuilder->getSize(nearest_pow2(entries) - 1);
    395     Value * blockIndex = mKernel->getScalarField(mSelf, blockNoScalar);
     395    Value * blockIndex = mKernel->getBlockNo(mSelf);
    396396    Value * carryIndex0 = iBuilder->CreateSub(blockIndex, iBuilder->getSize(entries));
    397397    Value * loadIndex0 = iBuilder->CreateAnd(carryIndex0, indexMask);
  • icGREP/icgrep-devel/icgrep/pablo/codegenstate.h

    r5283 r5285  
    276276    }
    277277
    278     inline String * getName(const llvm::StringRef & name) const {
    279         return mParent->getName(name);
    280     }
    281 
    282278    inline String * makeName(const llvm::StringRef & prefix) const {
    283279        return mParent->makeName(prefix);
  • icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.cpp

    r5283 r5285  
    4848}
    4949   
    50 void PabloCompiler::compile(Value * const self, Function * function) {
    51 
    52     // Make sure that we generate code into the right module.
     50void PabloCompiler::compile(Function * function, Value * const self, Value * const blockNo) {
     51
    5352    mSelf = self;
     53
    5454    mFunction = function;
    55 
    56     //Generate Kernel//
    57     iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", function, 0));
    5855
    5956    mCarryManager->initializeCodeGen(self, function);
     
    6259    mMarker.emplace(entryBlock->createZeroes(), iBuilder->allZeroes());
    6360    mMarker.emplace(entryBlock->createOnes(), iBuilder->allOnes());
    64 
    65     Value * const blockNo = mKernel->getScalarField(mSelf, blockNoScalar);
    6661
    6762    for (unsigned i = 0; i < mKernel->getNumOfInputs(); ++i) {
     
    482477            const unsigned block_shift = (l->getAmount() / iBuilder->getBitBlockWidth());
    483478            std::string inputName = cast<Var>(var)->getName().str();
    484             Value * blockNo = mKernel->getScalarField(mSelf, blockNoScalar);
     479            Value * blockNo = mKernel->getBlockNo(mSelf);
    485480            Value * lookAhead_blockPtr  = mKernel->getStreamSetPtr(mSelf, inputName, iBuilder->CreateAdd(blockNo, iBuilder->getSize(block_shift)));
    486481            Value * lookAhead_inputPtr = iBuilder->CreateGEP(lookAhead_blockPtr, {iBuilder->getInt32(0), iBuilder->getInt32(index)});
  • icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.h

    r5283 r5285  
    3434    ~PabloCompiler();
    3535    void initializeKernelData();
    36     void compile(llvm::Value * const self, llvm::Function * doBlockFunction);
     36    void compile(llvm::Function * function, llvm::Value * const self, llvm::Value * const blockNo);
    3737
    3838private:
  • icGREP/icgrep-devel/icgrep/pablo/pablo_kernel.cpp

    r5283 r5285  
    8888}
    8989
    90 void PabloKernel::generateDoBlockMethod() const {
    91     auto savePoint = iBuilder->saveIP();
    92     Module * const m = iBuilder->getModule();
    93     Function * const f = m->getFunction(mKernelName + doBlock_suffix);
    94     Value * const self = &*(f->arg_begin());
    95     mPabloCompiler->compile(self, f);
    96     iBuilder->CreateRetVoid();
    97     #ifndef NDEBUG
    98     llvm::verifyFunction(*f, &errs());
    99     #endif
    100     iBuilder->restoreIP(savePoint);
     90void PabloKernel::generateDoBlockMethod(Function * function, Value  *self, Value * blockNo) const {
     91    mPabloCompiler->compile(function, self, blockNo);
    10192}
    10293
    103 void PabloKernel::generateFinalBlockMethod() const {
    104     auto savePoint = iBuilder->saveIP();
    105     Module * m = iBuilder->getModule();
    106     Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
    107     Function * finalBlockFunction = m->getFunction(mKernelName + finalBlock_suffix);
    108     iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "fb_entry", finalBlockFunction, 0));
    109     // Final Block arguments: self, remaining, then the standard DoBlock args.
    110     Function::arg_iterator args = finalBlockFunction->arg_begin();
    111     Value * self = &*(args++);
    112     Value * remaining = &*(args++);
    113     std::vector<Value *> doBlockArgs = {self};
    114     while (args != finalBlockFunction->arg_end()){
    115         doBlockArgs.push_back(&*args++);
    116     }
     94void PabloKernel::generateFinalBlockMethod(Function * function, Value *self, Value *remainingBytes, Value *blockNo) const {
    11795    // Standard Pablo convention for final block processing: set a bit marking
    11896    // the position just past EOF, as well as a mask marking all positions past EOF.
    119     setScalarField(self, "EOFbit", iBuilder->bitblock_set_bit(remaining));
    120     setScalarField(self, "EOFmask", iBuilder->bitblock_mask_from(remaining));
    121     iBuilder->CreateCall(doBlockFunction, doBlockArgs);
    122     iBuilder->CreateRetVoid();
    123     #ifndef NDEBUG
    124     llvm::verifyFunction(*finalBlockFunction, &errs());
    125     #endif
    126     iBuilder->restoreIP(savePoint);
     97    setScalarField(self, "EOFbit", iBuilder->bitblock_set_bit(remainingBytes));
     98    setScalarField(self, "EOFmask", iBuilder->bitblock_mask_from(remainingBytes));
     99    iBuilder->CreateCall(getDoBlockFunction(), { self });
    127100}
    128101
  • icGREP/icgrep-devel/icgrep/pablo/pablo_kernel.h

    r5283 r5285  
    114114    // so that the carry data requirements may be accommodated before
    115115    // finalizing the KernelStateType.
    116     void prepareKernel() override;
     116    void prepareKernel()  override final;
    117117
    118     void generateDoBlockMethod() const override;
    119    
     118    void generateDoBlockMethod(llvm::Function * function, llvm::Value * self, llvm::Value * blockNo) const override final;
     119
    120120    // The default method for Pablo final block processing sets the
    121121    // EOFmark bit and then calls the standard DoBlock function.
    122122    // This may be overridden for specialized processing.
    123     virtual void generateFinalBlockMethod() const override;
    124 
    125     inline String * getName(const llvm::StringRef & name) const {
    126         return mSymbolTable->getString(name, iBuilder);
    127     }
     123    virtual void generateFinalBlockMethod(llvm::Function * function, llvm::Value * self, llvm::Value * remainingBytes, llvm::Value * blockNo) const override final;
    128124
    129125    inline String * makeName(const llvm::StringRef & prefix) const {
  • icGREP/icgrep-devel/icgrep/pablo/symbol_generator.cpp

    r5283 r5285  
    1515    auto f = mPrefixMap.find(prefix);
    1616    if (f == mPrefixMap.end()) {   
    17         return getString(prefix, builder);
     17        char * const data = mAllocator.allocate<char>(prefix.size() + 1);
     18        std::memcpy(data, prefix.data(), prefix.size());
     19        data[prefix.size()] = '\0';
     20        llvm::StringRef name(data, prefix.size());
     21        mPrefixMap.insert(std::make_pair(name, 1));
     22        return new (mAllocator) String(builder->getInt8PtrTy(), name, mAllocator);
    1823    } else { // this string already exists; make a new string using the given prefix
    1924
     
    3944}
    4045
    41 String * SymbolGenerator::getString(const llvm::StringRef name, IDISA::IDISA_Builder * builder) noexcept {
    42     if (LLVM_UNLIKELY(name.size() == 0)) {
    43         throw std::runtime_error("symbol name cannot be 0-length");
    44     }
    45     const auto f = mStringMap.find(name);
    46     if (LLVM_LIKELY(f == mStringMap.end())) {
    47         assert ("prefix cannot exist for a non-existant key!" && (mPrefixMap.count(name) == 0));
    48         // create an internal copy of this name to prevent a temporary string from being added to the maps
    49         char * const data = mAllocator.allocate<char>(name.size() + 1);
    50         std::memcpy(data, name.data(), name.size());
    51         data[name.size()] = '\0';
    52         llvm::StringRef duplicate(data, name.size());
    53         mPrefixMap.insert(std::make_pair(duplicate, 1));
    54         String * result = new (mAllocator) String(builder->getInt8PtrTy(), duplicate, mAllocator); assert (result);
    55         mStringMap.insert(std::make_pair(duplicate, result));
    56         return result;
    57     }
    58     assert ("prefix must exist for a known key!" && (mPrefixMap.count(name) != 0));
    59     return f->second;
    60 }
    61 
    6246Integer * SymbolGenerator::getInteger(const IntTy value, IDISA::IDISA_Builder * builder) noexcept {
    6347    auto f = mIntegerMap.find(value);
     
    6650        result = new (mAllocator) Integer(value, builder->getSizeTy(), mAllocator);
    6751        assert (result->value() == value);
    68         mIntegerMap.emplace(std::make_pair(value, result));
     52        mIntegerMap.emplace(value, result);
    6953    } else {
    7054        result = f->second;
  • icGREP/icgrep-devel/icgrep/pablo/symbol_generator.h

    r5283 r5285  
    2323public:
    2424    using IntTy = int64_t;
    25     String * getString(const llvm::StringRef name, IDISA::IDISA_Builder * builder) noexcept;
    2625    String * makeString(const llvm::StringRef prefix, IDISA::IDISA_Builder * builder) noexcept;
    2726    Integer * getInteger(const IntTy value, IDISA::IDISA_Builder * builder) noexcept;
Note: See TracChangeset for help on using the changeset viewer.