Changeset 5603


Ignore:
Timestamp:
Aug 7, 2017, 11:03:08 AM (2 weeks ago)
Author:
lindanl
Message:

editd:group pattern segments.

Location:
icGREP/icgrep-devel/icgrep/editd
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/editd/editd.cpp

    r5597 r5603  
    3030#include <mutex>
    3131#include <boost/uuid/sha1.hpp>
     32#include <editd/editd_cpu_kernel.h>
    3233
    3334#include <toolchain/NVPTXDriver.h>
     
    5051static cl::opt<int> stepSize("step-size", cl::desc("Step Size"), cl::init(3));
    5152static cl::opt<int> prefixLen("prefix", cl::desc("Prefix length"), cl::init(3));
     53static cl::opt<int> groupSize("groupPatterns", cl::desc("Number of pattern segments per group."), cl::init(1));
    5254static cl::opt<bool> ShowPositions("display", cl::desc("Display the match positions."), cl::init(false));
    5355
     
    5557
    5658static cl::opt<bool> MultiEditdKernels("enable-multieditd-kernels", cl::desc("Construct multiple editd kernels in one pipeline."));
     59static cl::opt<bool> EditdIndexPatternKernels("enable-index-kernels", cl::desc("Use pattern index method."));
    5760
    5861using namespace kernel;
     
    154157            pattFile.close();
    155158        }
    156         codegen::GroupNum = pattVector.size();
     159        codegen::GroupNum = pattVector.size()/groupSize;
    157160    }
    158161
     
    404407}
    405408
     409
     410void editdIndexPatternPipeline(ParabixDriver & pxDriver, unsigned patternLen) {
     411
     412    auto & idb = pxDriver.getBuilder();
     413    Module * const m = idb->getModule();
     414    Type * const sizeTy = idb->getSizeTy();
     415    Type * const voidTy = idb->getVoidTy();
     416    Type * const inputType = PointerType::get(ArrayType::get(ArrayType::get(idb->getBitBlockType(), 8), 1), 0);
     417    Type * const patternPtrTy = PointerType::get(idb->getInt8Ty(), 0);
     418
     419    idb->LinkFunction("wrapped_report_pos", &wrapped_report_pos);
     420
     421    const unsigned segmentSize = codegen::SegmentSize;
     422    const unsigned bufferSegments = codegen::BufferSegments * codegen::ThreadNum;
     423
     424    Function * const main = cast<Function>(m->getOrInsertFunction("Main", voidTy, inputType, sizeTy, patternPtrTy, nullptr));
     425    main->setCallingConv(CallingConv::C);
     426    auto args = main->arg_begin();
     427    Value * const inputStream = &*(args++);
     428    inputStream->setName("input");
     429    Value * const fileSize = &*(args++);
     430    fileSize->setName("fileSize");
     431    Value * const pattStream = &*(args++);
     432    pattStream->setName("pattStream");
     433    idb->SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", main,0));
     434
     435    auto ChStream = pxDriver.addBuffer(make_unique<SourceBuffer>(idb, idb->getStreamSetTy(4)));
     436    auto mmapK = pxDriver.addKernelInstance(make_unique<MemorySourceKernel>(idb, inputType, segmentSize));
     437    mmapK->setInitialArguments({inputStream, fileSize});
     438    pxDriver.makeKernelCall(mmapK, {}, {ChStream});
     439
     440    auto MatchResults = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(editDistance + 1), segmentSize * bufferSegments));
     441    auto editdk = pxDriver.addKernelInstance(make_unique<kernel::editdCPUKernel>(idb, editDistance, patternLen, groupSize));
     442
     443    const unsigned numOfCarries = patternLen * (editDistance + 1) * 4 * groupSize;
     444    Type * strideCarryTy = ArrayType::get(idb->getBitBlockType(), numOfCarries);
     445    Value * strideCarry = idb->CreateAlloca(strideCarryTy);
     446    idb->CreateStore(Constant::getNullValue(strideCarryTy), strideCarry);
     447
     448    editdk->setInitialArguments({pattStream, strideCarry});
     449    pxDriver.makeKernelCall(editdk, {ChStream}, {MatchResults});
     450
     451    auto editdScanK = pxDriver.addKernelInstance(make_unique<editdScanKernel>(idb, editDistance));
     452    pxDriver.makeKernelCall(editdScanK, {MatchResults}, {});
     453
     454    pxDriver.generatePipelineIR();
     455
     456    idb->CreateRetVoid();
     457
     458    pxDriver.finalizeObject();
     459}
     460
    406461typedef void (*preprocessFunctionType)(const int fd, char * output_data);
    407462
     
    409464
    410465typedef void (*multiEditdFunctionType)(const int fd);
     466
     467typedef void (*editdIndexFunctionType)(char * byte_data, size_t filesize, const char * pattern);
    411468
    412469static char * chStream;
     
    468525    pthread_exit(NULL);
    469526}
    470 
    471 #define GROUPTHREADS 64
    472527
    473528void editdGPUCodeGen(unsigned patternLen){
     
    511566    Value * inputThreadPtr = iBuilder->CreateGEP(inputStream, tid);
    512567    Value * strides = iBuilder->CreateLoad(stridesPtr);
    513     Value * outputBlocks = iBuilder->CreateMul(strides, ConstantInt::get(int32ty, GROUPTHREADS));
     568    Value * outputBlocks = iBuilder->CreateMul(strides, ConstantInt::get(int32ty, iBuilder->getStride() / iBuilder->getBitBlockWidth()));
    514569    Value * resultStreamPtr = iBuilder->CreateGEP(resultStream, iBuilder->CreateAdd(iBuilder->CreateMul(bid, outputBlocks), tid));
    515570    Value * inputSize = iBuilder->CreateLoad(inputSizePtr);
     
    521576
    522577    ExternalBuffer * ResultStream = pxDriver.addExternalBuffer(make_unique<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(editDistance+1), resultStreamPtr, 1));   
    523     kernel::Kernel * editdk = pxDriver.addKernelInstance(make_unique<kernel::editdGPUKernel>(iBuilder, editDistance, patternLen));
     578    kernel::Kernel * editdk = pxDriver.addKernelInstance(make_unique<kernel::editdGPUKernel>(iBuilder, editDistance, patternLen, groupSize));
    524579     
    525     const unsigned numOfCarries = patternLen * (editDistance + 1) * 4;
     580    const unsigned numOfCarries = patternLen * (editDistance + 1) * 4 * groupSize;
    526581    Type * strideCarryTy = ArrayType::get(mBitBlockType, numOfCarries);
    527582    Value * strideCarry = iBuilder->CreateAlloca(strideCarryTy);
     
    607662
    608663    const unsigned segmentSize = codegen::SegmentSize;
     664    const unsigned bufferSegments = codegen::BufferSegments * codegen::ThreadNum;
    609665
    610666    Type * mBitBlockType = iBuilder->getBitBlockType();
     
    622678    fileSize->setName("fileSize");
    623679
    624 
    625680    StreamSetBuffer * MatchResults = pxDriver.addBuffer(make_unique<SourceBuffer>(iBuilder, iBuilder->getStreamSetTy(editDistance+1)));
    626     kernel::Kernel * sourceK = pxDriver.addKernelInstance(make_unique<kernel::MemorySourceKernel>(iBuilder, inputType, segmentSize));
     681    kernel::Kernel * sourceK = pxDriver.addKernelInstance(make_unique<kernel::MemorySourceKernel>(iBuilder, inputType, segmentSize * bufferSegments));
    627682    sourceK->setInitialArguments({inputStream, fileSize});
    628683    pxDriver.makeKernelCall(sourceK, {}, {MatchResults});
     
    667722
    668723#ifdef CUDA_ENABLED
    669     codegen::BlockSize = 64;
     724    if (codegen::NVPTX)
     725        codegen::BlockSize = 64;
    670726#endif
    671727
     
    685741        std::string patterns((std::istreambuf_iterator<char>(t)), std::istreambuf_iterator<char>());
    686742
    687         editdGPUCodeGen(patterns.length()/codegen::GroupNum - 1);
     743        editdGPUCodeGen(pattVector[0].length());
    688744        mergeGPUCodeGen();
    689745        ulong * rslt = RunPTX(PTXFilename, chStream, size, patterns.c_str(), patterns.length(), editDistance);
     
    708764    }
    709765    else{
    710         if (Threads == 1) {     
    711             for(unsigned i=0; i<pattGroups.size(); i++){
    712 
     766        if (Threads == 1) {
     767            if (EditdIndexPatternKernels) {
    713768                ParabixDriver pxDriver("editd");
    714                 editdPipeline(pxDriver, pattGroups[i]);
    715                 auto editd_ptr = reinterpret_cast<editdFunctionType>(pxDriver.getMain());
    716                 editd(editd_ptr, chStream, size);
     769                editdIndexPatternPipeline(pxDriver, pattVector[0].length());
     770                auto editd_ptr = reinterpret_cast<editdIndexFunctionType>(pxDriver.getMain());
     771
     772                for(unsigned i=0; i<pattVector.size(); i+=groupSize){
     773                    std::string pattern = "";
     774                    for (int j=0; j<groupSize; j++){
     775                        pattern += pattVector[i+j];
     776                    }
     777                    editd_ptr(chStream, size, pattern.c_str());
     778                }
     779            }
     780            else {
     781                for(unsigned i=0; i<pattGroups.size(); i++){
     782
     783                    ParabixDriver pxDriver("editd");
     784                    editdPipeline(pxDriver, pattGroups[i]);
     785                    auto editd_ptr = reinterpret_cast<editdFunctionType>(pxDriver.getMain());
     786                    editd(editd_ptr, chStream, size);
     787                }
    717788            }
    718789        }
  • icGREP/icgrep-devel/icgrep/editd/editd_cpu_kernel.cpp

    r5602 r5603  
    2626}
    2727
     28void editdCPUKernel::reset_to_zero(std::vector<std::vector<int>> & calculated){
     29    for (auto & sub : calculated) {
     30        std::fill(sub.begin(), sub.end(), 0);
     31    }
     32}
     33
    2834void editdCPUKernel::generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & idb) {
    2935
     
    4046    std::vector<std::vector<int>> calculated(mPatternLen, std::vector<int>(mEditDistance + 1, 0));
    4147    Value * pattPos = idb->getInt32(0);
    42     Value * pattCh = idb->CreateLoad(idb->CreateGEP(pattStartPtr, pattPos));
    43     Value * pattIdx = idb->CreateAnd(idb->CreateLShr(pattCh, 1), ConstantInt::get(int8ty, 3));
    44     Value * pattStream = idb->loadInputStreamBlock("CCStream", idb->CreateZExt(pattIdx, int32ty));
    45     pattPos = idb->CreateAdd(pattPos, ConstantInt::get(int32ty, 1));
    46 
    47     e[0][0] = pattStream;
    48     for(unsigned j = 1; j <= mEditDistance; j++){
    49       e[0][j] = idb->allOnes();
     48    for(unsigned j = 0; j <= mEditDistance; j++){
     49        e[mPatternLen][j] = idb->allZeroes();
    5050    }
    5151
    52     for(unsigned i = 1; i < mPatternLen; i++){
    53         pattCh = idb->CreateLoad(idb->CreateGEP(pattStartPtr, pattPos));
    54         pattIdx = idb->CreateAnd(idb->CreateLShr(pattCh, 1), ConstantInt::get(int8ty, 3));
     52    for(unsigned j = 1; j <= mEditDistance; j++){
     53        e[0][j] = idb->allOnes();
     54    }
     55
     56    for(unsigned g = 0; g < mGroupSize; g++){
     57        Value * pattCh = idb->CreateLoad(idb->CreateGEP(pattStartPtr, pattPos));
     58        Value * pattIdx = idb->CreateAnd(idb->CreateLShr(pattCh, 1), ConstantInt::get(int8ty, 3));
    5559        Value * pattStream = idb->loadInputStreamBlock("CCStream", idb->CreateZExt(pattIdx, int32ty));
     60        pattPos = idb->CreateAdd(pattPos, ConstantInt::get(int32ty, 1));
     61       
     62        e[0][0] = pattStream;
     63        for(unsigned i = 1; i < mPatternLen; i++){
     64            pattCh = idb->CreateLoad(idb->CreateGEP(pattStartPtr, pattPos));
     65            pattIdx = idb->CreateAnd(idb->CreateLShr(pattCh, 1), ConstantInt::get(int8ty, 3));
     66            pattStream = idb->loadInputStreamBlock("CCStream", idb->CreateZExt(pattIdx, int32ty));
     67            pattPos = idb->CreateAdd(pattPos, ConstantInt::get(int32ty, 1));
     68            bitblock_advance_ci_co(idb, e[i-1][0], 1, strideCarryArr, carryIdx++, adv, calculated, i-1, 0);
     69            e[i][0] = idb->CreateAnd(adv[i-1][0], pattStream);
     70            for(unsigned j = 1; j<= mEditDistance; j++){
     71                bitblock_advance_ci_co(idb, e[i-1][j], 1, strideCarryArr, carryIdx++, adv, calculated, i-1, j);
     72                bitblock_advance_ci_co(idb, e[i-1][j-1], 1, strideCarryArr, carryIdx++, adv, calculated, i-1, j-1);
     73                bitblock_advance_ci_co(idb, e[i][j-1], 1, strideCarryArr, carryIdx++, adv, calculated, i, j-1);
     74                Value * tmp1 = idb->CreateAnd(adv[i-1][j], pattStream);
     75                Value * tmp2 = idb->CreateAnd(adv[i-1][j-1], idb->CreateNot(pattStream));
     76                Value * tmp3 = idb->CreateOr(adv[i][j-1], e[i-1][j-1]);
     77                e[i][j] = idb->CreateOr(idb->CreateOr(tmp1, tmp2), tmp3);
     78            }
     79        }
     80        e[mPatternLen][0] = idb->CreateOr(e[mPatternLen][0], e[mPatternLen-1][0]);
     81        for(unsigned j = 1; j<= mEditDistance; j++){
     82            e[mPatternLen][j] = idb->CreateOr(e[mPatternLen][j], idb->CreateAnd(e[mPatternLen - 1][j], idb->CreateNot(e[mPatternLen - 1][j - 1])));
     83        }
     84        reset_to_zero(calculated);
     85    }
    5686
    57         bitblock_advance_ci_co(idb, e[i-1][0], 1, strideCarryArr, carryIdx++, adv, calculated, i-1, 0);
    58         e[i][0] = idb->CreateAnd(adv[i-1][0], pattStream);
    59         for(unsigned j = 1; j<= mEditDistance; j++){
    60             bitblock_advance_ci_co(idb, e[i-1][j], 1, strideCarryArr, carryIdx++, adv, calculated, i-1, j);
    61             bitblock_advance_ci_co(idb, e[i-1][j-1], 1, strideCarryArr, carryIdx++, adv, calculated, i-1, j-1);
    62             bitblock_advance_ci_co(idb, e[i][j-1], 1, strideCarryArr, carryIdx++, adv, calculated, i, j-1);
    63             Value * tmp1 = idb->CreateAnd(adv[i-1][j], pattStream);
    64             Value * tmp2 = idb->CreateAnd(adv[i-1][j-1], idb->CreateNot(pattStream));
    65             Value * tmp3 = idb->CreateOr(adv[i][j-1], e[i-1][j-1]);
    66             e[i][j] = idb->CreateOr(idb->CreateOr(tmp1, tmp2), tmp3);
    67 
    68         }
    69         pattPos = idb->CreateAdd(pattPos, ConstantInt::get(int32ty, 1));
    70     }
    71    
    72     idb->storeOutputStreamBlock("ResultStream", idb->getInt32(0), e[mPatternLen-1][0]);
    73     for(unsigned j = 1; j<= mEditDistance; j++){
    74         idb->storeOutputStreamBlock("ResultStream", idb->getInt32(j), idb->CreateAnd(e[mPatternLen-1][j], idb->CreateNot(e[mPatternLen-1][j-1])));
     87    for(unsigned j = 0; j<= mEditDistance; j++){
     88        idb->storeOutputStreamBlock("ResultStream", idb->getInt32(j), e[mPatternLen][j]);
    7589    }
    7690}
     
    8195}
    8296
    83 editdCPUKernel::editdCPUKernel(const std::unique_ptr<kernel::KernelBuilder> & b, unsigned dist, unsigned pattLen) :
     97editdCPUKernel::editdCPUKernel(const std::unique_ptr<kernel::KernelBuilder> & b, unsigned dist, unsigned pattLen, unsigned groupSize) :
    8498BlockOrientedKernel("editd_cpu",
    8599             {Binding{b->getStreamSetTy(4), "CCStream"}},
    86100             {Binding{b->getStreamSetTy(dist + 1), "ResultStream"}},
    87101             {Binding{PointerType::get(b->getInt8Ty(), 1), "pattStream"},
    88              Binding{PointerType::get(ArrayType::get(b->getBitBlockType(), pattLen * (dist + 1) * 4), 0), "strideCarry"}},
     102             Binding{PointerType::get(ArrayType::get(b->getBitBlockType(), pattLen * (dist + 1) * 4 * groupSize), 0), "strideCarry"}},
    89103             {},
    90104             {Binding{b->getBitBlockType(), "EOFmask"}}),
    91105mEditDistance(dist),
    92 mPatternLen(pattLen){
     106mPatternLen(pattLen),
     107mGroupSize(groupSize){
    93108}
    94109
  • icGREP/icgrep-devel/icgrep/editd/editd_cpu_kernel.h

    r5440 r5603  
    1717public:
    1818
    19     editdCPUKernel(const std::unique_ptr<kernel::KernelBuilder> & b, unsigned dist, unsigned pattLen);
     19    editdCPUKernel(const std::unique_ptr<kernel::KernelBuilder> & b, unsigned dist, unsigned pattLen, unsigned groupSize);
    2020   
    2121
     
    2424    void generateFinalBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & idb, llvm::Value * remainingBytes) override;
    2525    void bitblock_advance_ci_co(const std::unique_ptr<KernelBuilder> & idb, llvm::Value * val, unsigned shift, llvm::Value * stideCarryArr, unsigned carryIdx, std::vector<std::vector<llvm::Value *>> & adv, std::vector<std::vector<int>> & calculated, int i, int j) const;
     26    void reset_to_zero(std::vector<std::vector<int>> & calculated);
    2627    unsigned mEditDistance;
    2728    unsigned mPatternLen;
     29    unsigned mGroupSize;
    2830   
    2931};
  • icGREP/icgrep-devel/icgrep/editd/editd_gpu_kernel.cpp

    r5440 r5603  
    66#include <kernels/kernel_builder.h>
    77#include <llvm/IR/Module.h>
    8 
     8#include <iostream>
    99using namespace llvm;
    1010
     
    2222}
    2323
     24void reset_to_zero(std::vector<std::vector<int>> & calculated){
     25    for (auto & sub : calculated) {
     26        std::fill(sub.begin(), sub.end(), 0);
     27    }
     28}
     29
    2430void editdGPUKernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & idb) {
    2531
    2632    IntegerType * const int32ty = idb->getInt32Ty();
    27     IntegerType * const int8ty = idb->getInt8Ty();
    28     Value * pattLen = idb->getInt32(mPatternLen + 1);
     33    IntegerType * const int8ty = idb->getInt8Ty(); 
     34    Value * groupLen = idb->getInt32((mPatternLen + 1) * mGroupSize);
    2935    Value * pattPos = idb->getInt32(0);
    3036    Value * pattBuf = idb->getScalarField("pattStream");
     
    3339    unsigned carryIdx = 0;
    3440
    35     std::vector<std::vector<Value *>> e(mPatternLen, std::vector<Value *>(mEditDistance + 1));
     41    std::vector<std::vector<Value *>> e(mPatternLen + 1, std::vector<Value *>(mEditDistance + 1));
    3642    std::vector<std::vector<Value *>> adv(mPatternLen, std::vector<Value *>(mEditDistance + 1));
    3743    std::vector<std::vector<int>> calculated(mPatternLen, std::vector<int>(mEditDistance + 1, 0));
     
    4046    Function * bidFunc = cast<Function>(m->getOrInsertFunction("llvm.nvvm.read.ptx.sreg.ctaid.x", int32ty, nullptr));
    4147    Value * bid = idb->CreateCall(bidFunc);
    42     Value * pattStartPtr = idb->CreateGEP(pattBuf, idb->CreateMul(pattLen, bid));
    43     Value * pattPtr = idb->CreateGEP(pattStartPtr, pattPos);
    44     Value * pattCh = idb->CreateLoad(pattPtr);
    45     Value * pattIdx = idb->CreateAnd(idb->CreateLShr(pattCh, 1), ConstantInt::get(int8ty, 3));
    46     Value * pattStream = idb->loadInputStreamBlock("CCStream", idb->CreateZExt(pattIdx, int32ty));
    47     pattPos = idb->CreateAdd(pattPos, ConstantInt::get(int32ty, 1));
     48    Value * pattStartPtr = idb->CreateGEP(pattBuf, idb->CreateMul(groupLen, bid));
    4849
    49     e[0][0] = pattStream;
     50    for(unsigned j = 0; j <= mEditDistance; j++){
     51        e[mPatternLen][j] = idb->allZeroes();
     52    }
     53
    5054    for(unsigned j = 1; j <= mEditDistance; j++){
    5155        e[0][j] = idb->allOnes();
    5256    }
    53     for(unsigned i = 1; i < mPatternLen; i++){
    54         pattPtr = idb->CreateGEP(pattStartPtr, pattPos);
    55         pattCh = idb->CreateLoad(pattPtr);
    56         pattIdx = idb->CreateAnd(idb->CreateLShr(pattCh, 1), ConstantInt::get(int8ty, 3));
    57         pattStream = idb->loadInputStreamBlock("CCStream", idb->CreateZExt(pattIdx, int32ty));
    58         bitblock_advance_ci_co(idb, e[i-1][0], 1, strideCarryArr, carryIdx++, adv, calculated, i-1, 0);
    59         e[i][0] = idb->CreateAnd(adv[i-1][0], pattStream);
     57
     58    for(unsigned g = 0; g < mGroupSize; g++){
     59        Value * pattCh = idb->CreateLoad(idb->CreateGEP(pattStartPtr, pattPos));
     60        Value * pattIdx = idb->CreateAnd(idb->CreateLShr(pattCh, 1), ConstantInt::get(int8ty, 3));
     61        Value * pattStream = idb->loadInputStreamBlock("CCStream", idb->CreateZExt(pattIdx, int32ty));
     62        pattPos = idb->CreateAdd(pattPos, ConstantInt::get(int32ty, 1));
     63        e[0][0] = pattStream;
     64        for(unsigned i = 1; i < mPatternLen; i++){
     65            Value * pattCh = idb->CreateLoad(idb->CreateGEP(pattStartPtr, pattPos));
     66            pattIdx = idb->CreateAnd(idb->CreateLShr(pattCh, 1), ConstantInt::get(int8ty, 3));
     67            pattStream = idb->loadInputStreamBlock("CCStream", idb->CreateZExt(pattIdx, int32ty));
     68            pattPos = idb->CreateAdd(pattPos, ConstantInt::get(int32ty, 1));
     69            bitblock_advance_ci_co(idb, e[i-1][0], 1, strideCarryArr, carryIdx++, adv, calculated, i-1, 0);
     70            e[i][0] = idb->CreateAnd(adv[i-1][0], pattStream);
     71            for(unsigned j = 1; j<= mEditDistance; j++){
     72                bitblock_advance_ci_co(idb, e[i-1][j], 1, strideCarryArr, carryIdx++, adv, calculated, i-1, j);
     73                bitblock_advance_ci_co(idb, e[i-1][j-1], 1, strideCarryArr, carryIdx++, adv, calculated, i-1, j-1);
     74                bitblock_advance_ci_co(idb, e[i][j-1], 1, strideCarryArr, carryIdx++, adv, calculated, i, j-1);
     75                Value * tmp1 = idb->CreateAnd(adv[i-1][j], pattStream);
     76                Value * tmp2 = idb->CreateAnd(adv[i-1][j-1], idb->CreateNot(pattStream));
     77                Value * tmp3 = idb->CreateOr(adv[i][j-1], e[i-1][j-1]);
     78                e[i][j] = idb->CreateOr(idb->CreateOr(tmp1, tmp2), tmp3);
     79            }
     80        }
     81        e[mPatternLen][0] = idb->CreateOr(e[mPatternLen][0], e[mPatternLen-1][0]);
    6082        for(unsigned j = 1; j<= mEditDistance; j++){
    61             bitblock_advance_ci_co(idb, e[i-1][j], 1, strideCarryArr, carryIdx++, adv, calculated, i-1, j);
    62             bitblock_advance_ci_co(idb, e[i-1][j-1], 1, strideCarryArr, carryIdx++, adv, calculated, i-1, j-1);
    63             bitblock_advance_ci_co(idb, e[i][j-1], 1, strideCarryArr, carryIdx++, adv, calculated, i, j-1);
    64             Value * tmp1 = idb->CreateAnd(adv[i-1][j], pattStream);
    65             Value * tmp2 = idb->CreateAnd(adv[i-1][j-1], idb->CreateNot(pattStream));
    66             Value * tmp3 = idb->CreateOr(adv[i][j-1], e[i-1][j-1]);
    67             e[i][j] = idb->CreateOr(idb->CreateOr(tmp1, tmp2), tmp3);
     83            e[mPatternLen][j] = idb->CreateOr(e[mPatternLen][j], idb->CreateAnd(e[mPatternLen - 1][j], idb->CreateNot(e[mPatternLen - 1][j - 1])));
    6884        }
    6985        pattPos = idb->CreateAdd(pattPos, ConstantInt::get(int32ty, 1));
     86        reset_to_zero(calculated);
    7087    }
    71     idb->storeOutputStreamBlock("ResultStream", idb->getInt32(0), e[mPatternLen-1][0]);
    72     for(unsigned j = 1; j<= mEditDistance; j++){
    73         idb->storeOutputStreamBlock("ResultStream", idb->getInt32(j), idb->CreateAnd(e[mPatternLen - 1][j], idb->CreateNot(e[mPatternLen - 1][j - 1])));
     88
     89    for(unsigned j = 0; j<= mEditDistance; j++){
     90        idb->storeOutputStreamBlock("ResultStream", idb->getInt32(j), e[mPatternLen][j]);
    7491    }
    7592}
     
    8097}
    8198
    82 editdGPUKernel::editdGPUKernel(const std::unique_ptr<kernel::KernelBuilder> & b, unsigned dist, unsigned pattLen) :
     99editdGPUKernel::editdGPUKernel(const std::unique_ptr<kernel::KernelBuilder> & b, unsigned dist, unsigned pattLen, unsigned groupSize) :
    83100BlockOrientedKernel("editd_gpu",
    84101              {Binding{b->getStreamSetTy(4), "CCStream"}},
    85102              {Binding{b->getStreamSetTy(dist + 1), "ResultStream"}},
    86103              {Binding{PointerType::get(b->getInt8Ty(), 1), "pattStream"},
    87               Binding{PointerType::get(ArrayType::get(b->getBitBlockType(), pattLen * (dist + 1) * 4), 0), "strideCarry"}},
     104              Binding{PointerType::get(ArrayType::get(b->getBitBlockType(), pattLen * (dist + 1) * 4 * groupSize), 0), "strideCarry"}},
    88105              {},
    89106              {Binding{b->getBitBlockType(), "EOFmask"}})
    90107, mEditDistance(dist)
    91 , mPatternLen(pattLen) {
     108, mPatternLen(pattLen)
     109, mGroupSize(groupSize) {
    92110}
    93111
  • icGREP/icgrep-devel/icgrep/editd/editd_gpu_kernel.h

    r5440 r5603  
    1717public:
    1818   
    19     editdGPUKernel(const std::unique_ptr<kernel::KernelBuilder> & b, unsigned dist, unsigned pattLen);
     19    editdGPUKernel(const std::unique_ptr<kernel::KernelBuilder> & b, unsigned dist, unsigned pattLen, unsigned groupSize);
    2020   
    2121   
     
    2525    unsigned mEditDistance;
    2626    unsigned mPatternLen;
     27    unsigned mGroupSize;
    2728   
    2829};   
Note: See TracChangeset for help on using the changeset viewer.