Ignore:
Timestamp:
Nov 2, 2018, 7:18:31 PM (9 months ago)
Author:
nmedfort
Message:

Initial version of PipelineKernel? + revised StreamSet? model.

Location:
icGREP/icgrep-devel/icgrep/editd
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/editd/editd.cpp

    r6047 r6184  
    2929#include <fcntl.h>
    3030#include <mutex>
    31 #include <boost/uuid/sha1.hpp>
    3231#include <editd/editd_cpu_kernel.h>
    33 
    34 #ifdef CUDA_ENABLED
    35 #include <toolchain/NVPTXDriver.h>
    36 #include <editd/editd_gpu_kernel.h>
    37 #include <editd/EditdCudaDriver.h>
    38 #endif
     32#include <kernels/pipeline_builder.h>
     33#include <util/aligned_allocator.h>
    3934
    4035using namespace llvm;
     
    6156using namespace kernel;
    6257using namespace pablo;
    63 using namespace parabix;
    64 
    65 #ifdef CUDA_ENABLED
    66 const static std::string PTXFilename = "editd.ptx";
    67 #endif
    6858
    6959struct matchPosition
     
    171161}
    172162
    173 inline static std::string sha1sum(const std::string & str) {
    174     char buffer[41];    // 40 hex-digits and the terminating null
    175     uint32_t digest[5]; // 160 bits in total
    176     boost::uuids::detail::sha1 sha1;
    177     sha1.process_bytes(str.c_str(), str.size());
    178     sha1.get_digest(digest);
    179     snprintf(buffer, sizeof(buffer), "%.8x%.8x%.8x%.8x%.8x",
    180              digest[0], digest[1], digest[2], digest[3], digest[4]);
    181     return std::string(buffer);
    182 }
    183 
    184 std::string createName(const std::vector<std::string> & patterns) {
    185     std::string name = "";
    186     for(unsigned i=0; i<patterns.size(); i++)
    187         name += patterns[i];
    188     return name + std::to_string(editDistance);
    189 }
    190 
    191 class PatternKernel final: public pablo::PabloKernel {
    192 public:
    193     PatternKernel(const std::unique_ptr<kernel::KernelBuilder> & b, const std::vector<std::string> & patterns);
    194     std::string makeSignature(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
    195     bool isCachable() const override { return true;}
    196 protected:
    197     void generatePabloMethod() override;
    198 private:
    199     const std::vector<std::string> & mPatterns;
    200 };
    201 
    202 PatternKernel::PatternKernel(const std::unique_ptr<kernel::KernelBuilder> & b, const std::vector<std::string> & patterns)
    203 : PabloKernel(b, sha1sum(createName(patterns)), {{b->getStreamSetTy(4), "pat"}}, {{b->getStreamSetTy(editDistance + 1), "E"}})
    204 , mPatterns(patterns) { 
    205 }
    206 
    207 std::string PatternKernel::makeSignature(const std::unique_ptr<kernel::KernelBuilder> &) {
    208     return getName();
    209 }
    210 
    211 void PatternKernel::generatePabloMethod() {
    212     PabloBuilder entry(getEntryScope());
    213     Var * const pat = getInputStreamVar("pat");
    214     PabloAST * basisBits[4];
    215     basisBits[0] = entry.createExtract(pat, 0, "A");
    216     basisBits[1] = entry.createExtract(pat, 1, "C");
    217     basisBits[2] = entry.createExtract(pat, 2, "T");
    218     basisBits[3] = entry.createExtract(pat, 3, "G");
    219     re::Pattern_Compiler pattern_compiler(*this);
    220     if (optPosition == 0) optPosition = editDistance + 6;
    221     pattern_compiler.compile(mPatterns, entry, basisBits, editDistance, optPosition, stepSize);
    222 }
    223 
    224 std::mutex store_mutex;
    225 extern "C" void wrapped_report_pos(size_t match_pos, int dist) {
    226     struct matchPosition curMatch;
    227     curMatch.pos = match_pos;
    228     curMatch.dist = dist;
    229 
    230     store_mutex.lock();
    231     matchList.push_back(curMatch);
    232     if(ShowPositions)
    233         std::cout << "pos: " << match_pos << ", dist:" << dist << "\n";
    234     store_mutex.unlock();
    235 }
    236 
    237 void editdPipeline(ParabixDriver & pxDriver, const std::vector<std::string> & patterns) {
    238 
    239     auto & idb = pxDriver.getBuilder();
    240     Module * const m = idb->getModule();
    241     Type * const sizeTy = idb->getSizeTy();
    242     Type * const voidTy = idb->getVoidTy();
    243     Type * const inputType = idb->getIntNTy(1)->getPointerTo();
    244 
    245     idb->LinkFunction("wrapped_report_pos", &wrapped_report_pos);
    246 
    247     const unsigned segmentSize = codegen::SegmentSize;
    248     const unsigned bufferSegments = codegen::BufferSegments * codegen::ThreadNum;
    249 
    250     Function * const main = cast<Function>(m->getOrInsertFunction("Main", voidTy, inputType, sizeTy, nullptr));
    251     main->setCallingConv(CallingConv::C);
    252     auto args = main->arg_begin();
    253     Value * const inputStream = &*(args++);
    254     inputStream->setName("input");
    255     Value * const fileSize = &*(args++);
    256     fileSize->setName("fileSize");
    257     idb->SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", main,0));
    258 
    259     auto ChStream = pxDriver.addBuffer<ExternalBuffer>(idb, idb->getStreamSetTy(4));
    260     auto mmapK = pxDriver.addKernelInstance<MemorySourceKernel>(idb, 4, 1);
    261     mmapK->setInitialArguments({inputStream, fileSize});
    262     pxDriver.makeKernelCall(mmapK, {}, {ChStream});
    263 
    264     auto MatchResults = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(editDistance + 1), segmentSize * bufferSegments);
    265     auto editdk = pxDriver.addKernelInstance<PatternKernel>(idb, patterns);
    266     pxDriver.makeKernelCall(editdk, {ChStream}, {MatchResults});
    267 
    268     auto editdScanK = pxDriver.addKernelInstance<editdScanKernel>(idb, editDistance);
    269     pxDriver.makeKernelCall(editdScanK, {MatchResults}, {});
    270 
    271     pxDriver.generatePipelineIR();
    272     pxDriver.deallocateBuffers();
    273     idb->CreateRetVoid();
    274 
    275     pxDriver.finalizeObject();
    276 }
     163typedef void (*preprocessFunctionType)(char * output_data, size_t output_size, const uint32_t fd);
     164
     165static char * chStream;
     166static size_t size;
     167
     168//class PreprocessPipeline : public PipelineKernel {
     169//public:
     170//    PreprocessPipeline(EngineInstance & driver, StreamSet * CCResults)
     171//     : PipelineKernel(driver,
     172//    {},
     173//    {Binding{"CCResults", CCResults}},
     174//    {Binding{driver.getBuilder()->getInt32Ty(), "fileDescriptor"}},
     175//    {}) {
     176
     177//    }
     178//};
    277179
    278180class PreprocessKernel final: public pablo::PabloKernel {
    279181public:
    280     PreprocessKernel(const std::unique_ptr<kernel::KernelBuilder> & b);
     182    PreprocessKernel(const std::unique_ptr<KernelBuilder> & b, StreamSet * BasisBits, StreamSet * CCResults);
    281183    bool isCachable() const override { return true; }
    282184    bool hasSignature() const override { return false; }
     
    285187};
    286188
    287 PreprocessKernel::PreprocessKernel(const std::unique_ptr<kernel::KernelBuilder> & b)
    288 : PabloKernel(b, "ccc", {{b->getStreamSetTy(8), "basis"}}, {{b->getStreamSetTy(4), "pat"}}) {
     189PreprocessKernel::PreprocessKernel(const std::unique_ptr<KernelBuilder> & b, StreamSet * BasisBits, StreamSet * CCResults)
     190: PabloKernel(b, "editd_preprocess", {{"basis", BasisBits}}, {{"pat", CCResults}}) {
    289191
    290192}
     
    304206}
    305207
    306 void preprocessPipeline(ParabixDriver & pxDriver) {
    307 
    308     auto & iBuilder = pxDriver.getBuilder();
    309     Module * m = iBuilder->getModule();
    310 
    311     Type * const voidTy = iBuilder->getVoidTy();
    312     Type * const int32Ty = iBuilder->getInt32Ty();
    313     Type * const outputType = PointerType::get(ArrayType::get(iBuilder->getBitBlockType(), 4), 0);
    314 
    315     const unsigned segmentSize = codegen::SegmentSize;
    316     const unsigned bufferSegments = codegen::BufferSegments * codegen::ThreadNum;
    317 
    318     Function * const main = cast<Function>(m->getOrInsertFunction("Main", voidTy, int32Ty, outputType, nullptr));
    319     main->setCallingConv(CallingConv::C);
    320     Function::arg_iterator args = main->arg_begin();
    321 
    322     Value * const fileDescriptor = &*(args++);
    323     fileDescriptor->setName("fileDescriptor");
    324     Value * const outputStream = &*(args++);
    325     outputStream->setName("output");
    326 
    327     iBuilder->SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", main));
    328 
    329     auto ByteStream = pxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
    330 
    331     auto mmapK = pxDriver.addKernelInstance<MMapSourceKernel>(iBuilder);
    332     mmapK->setInitialArguments({fileDescriptor});
    333     pxDriver.makeKernelCall(mmapK, {}, {ByteStream});
    334 
    335     auto BasisBits = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), segmentSize * bufferSegments);
    336     auto s2pk = pxDriver.addKernelInstance<S2PKernel>(iBuilder);
    337     pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
    338 
    339     auto CCResults = pxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(4), outputStream);
    340     auto ccck = pxDriver.addKernelInstance<PreprocessKernel>(iBuilder);
    341     // NOTE: CCResults are never consumed because they are written directly into an external buffer. This may make analysis difficult.
    342     pxDriver.makeKernelCall(ccck, {BasisBits}, {CCResults});
    343 
    344     pxDriver.generatePipelineIR();
    345     pxDriver.deallocateBuffers();
    346     iBuilder->CreateRetVoid();
    347 
    348     pxDriver.finalizeObject();
    349 }
    350 
    351 void multiEditdPipeline(ParabixDriver & pxDriver) {
    352     auto & idb = pxDriver.getBuilder();
    353     Module * const m = idb->getModule();
    354     Type * const voidTy = idb->getVoidTy();
    355     Type * const int32Ty = idb->getInt32Ty();
    356 
    357     idb->LinkFunction("wrapped_report_pos", &wrapped_report_pos);
    358 
    359     const unsigned segmentSize = codegen::SegmentSize;
    360     const unsigned bufferSegments = codegen::BufferSegments * codegen::ThreadNum;
    361 
    362     Function * const main = cast<Function>(m->getOrInsertFunction("Main", voidTy, int32Ty, nullptr));
    363     main->setCallingConv(CallingConv::C);
    364     Function::arg_iterator args = main->arg_begin();
    365 
    366     Value * const fileDescriptor = &*(args++);
    367     fileDescriptor->setName("fileDescriptor");
    368 
    369     idb->SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", main,0));
    370 
    371     auto ByteStream = pxDriver.addBuffer<ExternalBuffer>(idb, idb->getStreamSetTy(1, 8));
    372 
    373     auto mmapK = pxDriver.addKernelInstance<MMapSourceKernel>(idb);
    374     mmapK->setInitialArguments({fileDescriptor});
    375     pxDriver.makeKernelCall(mmapK, {}, {ByteStream});
    376 
    377     auto ChStream = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(4), segmentSize * bufferSegments);
    378     auto ccck = pxDriver.addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, "ccc",
    379         std::vector<re::CC *>{re::makeCC(re::makeCC(0x41), re::makeCC(0x61)),
    380                               re::makeCC(re::makeCC(0x43), re::makeCC(0x63)),
    381                               re::makeCC(re::makeCC(0x54), re::makeCC(0x74)),
    382                               re::makeCC(re::makeCC(0x47), re::makeCC(0x67))});
    383     pxDriver.makeKernelCall(ccck, {ByteStream}, {ChStream});
    384 
    385     const auto n = pattGroups.size();
    386    
    387     std::vector<StreamSetBuffer *> MatchResultsBufs(n);
    388    
    389     for(unsigned i = 0; i < n; ++i){
    390         auto MatchResults = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(editDistance + 1), segmentSize * bufferSegments);
    391         auto editdk = pxDriver.addKernelInstance<PatternKernel>(idb, pattGroups[i]);
    392         pxDriver.makeKernelCall(editdk, {ChStream}, {MatchResults});
    393         MatchResultsBufs[i] = MatchResults;
    394     }
    395     StreamSetBuffer * MergedResults = MatchResultsBufs[0];
    396     if (n > 1) {
    397         MergedResults = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(editDistance + 1), segmentSize * bufferSegments);
    398         kernel::Kernel * streamsMergeK = pxDriver.addKernelInstance<kernel::StreamsMerge>(idb, editDistance + 1, n);
    399         pxDriver.makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
    400     }
    401 
    402     auto editdScanK = pxDriver.addKernelInstance<editdScanKernel>(idb, editDistance);
    403     pxDriver.makeKernelCall(editdScanK, {MergedResults}, {});
    404 
    405     pxDriver.generatePipelineIR();
    406     pxDriver.deallocateBuffers();
    407     idb->CreateRetVoid();
    408 
    409     pxDriver.finalizeObject();
    410 }
    411 
    412 
    413 void editdIndexPatternPipeline(ParabixDriver & pxDriver, unsigned patternLen) {
    414 
    415     auto & idb = pxDriver.getBuilder();
    416     Module * const m = idb->getModule();
    417     Type * const sizeTy = idb->getSizeTy();
    418     Type * const voidTy = idb->getVoidTy();
    419     Type * const inputType = idb->getIntNTy(1)->getPointerTo();
    420     Type * const patternPtrTy = PointerType::get(idb->getInt8Ty(), 0);
    421 
    422     idb->LinkFunction("wrapped_report_pos", &wrapped_report_pos);
    423 
    424     const unsigned segmentSize = codegen::SegmentSize;
    425     const unsigned bufferSegments = codegen::BufferSegments * codegen::ThreadNum;
    426 
    427     Function * const main = cast<Function>(m->getOrInsertFunction("Main", voidTy, inputType, sizeTy, patternPtrTy, nullptr));
    428     main->setCallingConv(CallingConv::C);
    429     auto args = main->arg_begin();
    430     Value * const inputStream = &*(args++);
    431     inputStream->setName("input");
    432     Value * const fileSize = &*(args++);
    433     fileSize->setName("fileSize");
    434     Value * const pattStream = &*(args++);
    435     pattStream->setName("pattStream");
    436     idb->SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", main,0));
    437 
    438     auto ChStream = pxDriver.addBuffer<ExternalBuffer>(idb, idb->getStreamSetTy(4));
    439     auto mmapK = pxDriver.addKernelInstance<MemorySourceKernel>(idb, 4, 1);
    440     mmapK->setInitialArguments({inputStream, fileSize});
    441     pxDriver.makeKernelCall(mmapK, {}, {ChStream});
    442 
    443     auto MatchResults = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(editDistance + 1), segmentSize * bufferSegments);
    444     auto editdk = pxDriver.addKernelInstance<kernel::editdCPUKernel>(idb, editDistance, patternLen, groupSize);
    445 
    446     const unsigned numOfCarries = patternLen * (editDistance + 1) * 4 * groupSize;
    447     Type * strideCarryTy = ArrayType::get(idb->getBitBlockType(), numOfCarries);
    448     Value * strideCarry = idb->CreateAlloca(strideCarryTy);
    449     idb->CreateStore(Constant::getNullValue(strideCarryTy), strideCarry);
    450 
    451     editdk->setInitialArguments({pattStream, strideCarry});
    452     pxDriver.makeKernelCall(editdk, {ChStream}, {MatchResults});
    453 
    454     auto editdScanK = pxDriver.addKernelInstance<editdScanKernel>(idb, editDistance);
    455     pxDriver.makeKernelCall(editdScanK, {MatchResults}, {});
    456 
    457     pxDriver.generatePipelineIR();
    458 
    459     idb->CreateRetVoid();
    460 
    461     pxDriver.finalizeObject();
    462 }
    463 
    464 typedef void (*preprocessFunctionType)(const int fd, char * output_data);
    465 
    466 typedef void (*editdFunctionType)(char * byte_data, size_t filesize);
    467 
    468 typedef void (*multiEditdFunctionType)(const int fd);
    469 
    470 typedef void (*editdIndexFunctionType)(char * byte_data, size_t filesize, const char * pattern);
    471 
    472 static char * chStream;
    473 static size_t size;
     208preprocessFunctionType preprocessPipeline(CPUDriver & pxDriver) {
     209    StreamSet * const CCResults = pxDriver.CreateStreamSet(4);
     210    auto & b = pxDriver.getBuilder();
     211    Type * const int32Ty = b->getInt32Ty();
     212    auto P = pxDriver.makePipelineWithIO({}, {{"CCResults", CCResults}}, {{int32Ty, "fileDescriptor"}});
     213    Scalar * const fileDescriptor = P->getInputScalar("fileDescriptor");
     214    StreamSet * const ByteStream = P->CreateStreamSet(1, 8);
     215    P->CreateKernelCall<MMapSourceKernel>(fileDescriptor, ByteStream);
     216    StreamSet * const BasisBits = P->CreateStreamSet(8);
     217    P->CreateKernelCall<S2PKernel>(ByteStream, BasisBits);
     218    P->CreateKernelCall<PreprocessKernel>(BasisBits, CCResults);
     219    return reinterpret_cast<preprocessFunctionType>(P->compile());
     220}
    474221
    475222size_t file_size(const int fd) {
     
    481228}
    482229
    483 char * preprocess(preprocessFunctionType fn_ptr) {
     230#define ALIGNMENT (32UL)
     231
     232inline bool is_power_2(const unsigned n) {
     233    return ((n & (n - 1)) == 0) && n;
     234}
     235
     236inline unsigned round_up_to(const unsigned x, const unsigned y) {
     237    assert(is_power_2(y));
     238    return (x + y - 1) & -y;
     239}
     240
     241char * preprocess(preprocessFunctionType preprocess) {
    484242    std::string fileName = inputFiles[0];
    485243    const int fd = open(inputFiles[0].c_str(), O_RDONLY);
    486244    if (LLVM_UNLIKELY(fd == -1)) {
    487         std::cerr << "Error: cannot open " << fileName << " for processing. Skipped.\n";
     245        std::cerr << "Error: cannot open " << fileName << " for processing.\n";
    488246        exit(-1);
    489247    }
    490248    size = file_size(fd);
    491     int ret = posix_memalign((void**)&chStream, 32, size);
    492     if (ret) {
    493         std::cerr << "Cannot allocate memory for output.\n";
    494         exit(-2);
    495     }
    496     fn_ptr(fd, chStream);
     249
     250    // Given a 8-bit bytestream of length n, we need space for 4 bitstreams of length n ...
     251    AlignedAllocator<char, ALIGNMENT> alloc;
     252    const auto n = round_up_to(size, 8 * ALIGNMENT);
     253    chStream = alloc.allocate((4 * n) / 8);
     254    preprocess(chStream, n, fd);
    497255    close(fd);
    498256    return chStream;
    499257}
    500258
    501 void editd(editdFunctionType fn_ptr, char * inputStream, size_t size) {
    502     fn_ptr(inputStream, size);
     259
     260std::string createName(const std::vector<std::string> & patterns) {
     261    std::string name = "";
     262    for(unsigned i=0; i<patterns.size(); i++)
     263        name += patterns[i];
     264    return name + std::to_string(editDistance);
     265}
     266
     267class PatternKernel final: public pablo::PabloKernel {
     268public:
     269    PatternKernel(const std::unique_ptr<KernelBuilder> & b, const std::vector<std::string> & patterns, StreamSet * pat, StreamSet * E);
     270    std::string makeSignature(const std::unique_ptr<KernelBuilder> &) override;
     271    bool isCachable() const override { return true;}
     272protected:
     273    void generatePabloMethod() override;
     274private:
     275    const std::vector<std::string> & mPatterns;
     276};
     277
     278PatternKernel::PatternKernel(const std::unique_ptr<KernelBuilder> & b, const std::vector<std::string> & patterns, StreamSet * pat, StreamSet * E)
     279: PabloKernel(b, getStringHash(createName(patterns)),
     280{{"pat", pat}},
     281{{"E", E}})
     282, mPatterns(patterns) {
     283
     284}
     285
     286std::string PatternKernel::makeSignature(const std::unique_ptr<KernelBuilder> &) {
     287    return getName();
     288}
     289
     290void PatternKernel::generatePabloMethod() {
     291    PabloBuilder entry(getEntryScope());
     292    Var * const pat = getInputStreamVar("pat");
     293    PabloAST * basisBits[4];
     294    basisBits[0] = entry.createExtract(pat, 0, "A");
     295    basisBits[1] = entry.createExtract(pat, 1, "C");
     296    basisBits[2] = entry.createExtract(pat, 2, "T");
     297    basisBits[3] = entry.createExtract(pat, 3, "G");
     298    re::Pattern_Compiler pattern_compiler(*this);
     299    if (optPosition == 0) optPosition = editDistance + 6;
     300    pattern_compiler.compile(mPatterns, entry, basisBits, editDistance, optPosition, stepSize);
     301}
     302
     303std::mutex store_mutex;
     304extern "C" void wrapped_report_pos(size_t match_pos, int dist) {
     305    struct matchPosition curMatch;
     306    curMatch.pos = match_pos;
     307    curMatch.dist = dist;
     308
     309    store_mutex.lock();
     310    matchList.push_back(curMatch);
     311    if(ShowPositions)
     312        std::cout << "pos: " << match_pos << ", dist:" << dist << "\n";
     313    store_mutex.unlock();
     314}
     315
     316typedef void (*editdFunctionType)(char * byte_data, size_t filesize);
     317
     318editdFunctionType editdPipeline(CPUDriver & pxDriver, const std::vector<std::string> & patterns) {
     319    auto & b = pxDriver.getBuilder();
     320    Type * const sizeTy = b->getSizeTy();
     321    Type * const inputType = b->getIntNTy(1)->getPointerTo();
     322    auto P = pxDriver.makePipeline({Binding{inputType, "input"}, Binding{sizeTy, "fileSize"}});
     323    Scalar * const inputStream = P->getInputScalar("input");
     324    Scalar * const fileSize = P->getInputScalar("fileSize");
     325    b->LinkFunction("wrapped_report_pos", wrapped_report_pos);
     326    StreamSet * const ChStream = P->CreateStreamSet(4);
     327    P->CreateKernelCall<MemorySourceKernel>(inputStream, fileSize, ChStream);
     328    StreamSet * const MatchResults = P->CreateStreamSet(editDistance + 1);
     329    P->CreateKernelCall<PatternKernel>(patterns, ChStream, MatchResults);
     330    P->CreateKernelCall<editdScanKernel>(MatchResults);
     331    return reinterpret_cast<editdFunctionType>(P->compile());
     332}
     333
     334typedef void (*multiEditdFunctionType)(const int fd);
     335
     336multiEditdFunctionType multiEditdPipeline(CPUDriver & pxDriver) {
     337
     338    auto & b = pxDriver.getBuilder();
     339    auto P = pxDriver.makePipeline({Binding{b->getInt32Ty(), "fileDescriptor"}});
     340    b->LinkFunction("wrapped_report_pos", wrapped_report_pos);
     341    Scalar * const fileDescriptor = P->getInputScalar("fileDescriptor");
     342
     343    StreamSet * const ByteStream = P->CreateStreamSet(1, 8);
     344    P->CreateKernelCall<MMapSourceKernel>(fileDescriptor, ByteStream);
     345
     346    std::vector<re::CC *> ccs;
     347    ccs.emplace_back(re::makeCC(re::makeCC(0x41), re::makeCC(0x61)));
     348    ccs.emplace_back(re::makeCC(re::makeCC(0x43), re::makeCC(0x63)));
     349    ccs.emplace_back(re::makeCC(re::makeCC(0x47), re::makeCC(0x67)));
     350    ccs.emplace_back(re::makeCC(re::makeCC(0x54), re::makeCC(0x74)));
     351
     352    StreamSet * const ChStream = P->CreateStreamSet(4);
     353    P->CreateKernelCall<DirectCharacterClassKernelBuilder>("editd_cc", ccs, ByteStream, ChStream);
     354
     355    const auto n = pattGroups.size();
     356    std::vector<StreamSet *> MatchResults(n);
     357    for(unsigned i = 0; i < n; ++i){
     358        MatchResults[i] = P->CreateStreamSet(editDistance + 1);
     359        P->CreateKernelCall<PatternKernel>(pattGroups[i], ChStream, MatchResults[i]);
     360    }
     361
     362    StreamSet * MergedResults = MatchResults[0];
     363    if (n > 1) {
     364        StreamSet * const MergedResults = P->CreateStreamSet();
     365        P->CreateKernelCall<StreamsMerge>(MatchResults, MergedResults);
     366    }
     367    P->CreateKernelCall<editdScanKernel>(MergedResults);
     368
     369    return reinterpret_cast<multiEditdFunctionType>(P->compile());
     370}
     371
     372typedef void (*editdIndexFunctionType)(char * byte_data, size_t filesize, const char * pattern);
     373
     374editdIndexFunctionType editdIndexPatternPipeline(CPUDriver & pxDriver, unsigned patternLen) {
     375
     376    auto & b = pxDriver.getBuilder();
     377
     378    Type * const inputType = b->getIntNTy(1)->getPointerTo();
     379    Type * const sizeTy = b->getSizeTy();
     380    Type * const patternPtrTy = PointerType::get(b->getInt8Ty(), 0);
     381
     382    auto P = pxDriver.makePipeline({Binding{inputType, "input"}, Binding{sizeTy, "fileSize"}, Binding{patternPtrTy, "pattStream"}});
     383    Scalar * const inputStream = P->getInputScalar("input");
     384    Scalar * const fileSize = P->getInputScalar("fileSize");
     385    Scalar * const pattStream = P->getInputScalar("pattStream");
     386
     387    b->LinkFunction("wrapped_report_pos", wrapped_report_pos);
     388
     389    StreamSet * const ChStream = P->CreateStreamSet(4);
     390    P->CreateKernelCall<MemorySourceKernel>(inputStream, fileSize, ChStream);
     391
     392    StreamSet * const MatchResults = P->CreateStreamSet(editDistance + 1);
     393
     394    P->CreateKernelCall<editdCPUKernel>(patternLen, groupSize, pattStream, ChStream, MatchResults);
     395
     396    P->CreateKernelCall<editdScanKernel>(MatchResults);
     397
     398    return reinterpret_cast<editdIndexFunctionType>(P->compile());
    503399}
    504400
     
    515411    while (groupIdx < pattGroups.size()){
    516412
    517         ParabixDriver pxDriver("editd");
    518         editdPipeline(pxDriver, pattGroups[groupIdx]);
    519         auto editd_ptr = reinterpret_cast<editdFunctionType>(pxDriver.getMain());
    520         editd(editd_ptr, chStream, size);
     413        CPUDriver pxDriver("editd");
     414        auto editd = editdPipeline(pxDriver, pattGroups[groupIdx]);
     415        editd(chStream, size);
    521416
    522417        count_mutex.lock();
     
    529424}
    530425
    531 #ifdef CUDA_ENABLED
    532 void editdGPUCodeGen(unsigned patternLen){
    533     NVPTXDriver pxDriver("editd");
    534     auto & iBuilder = pxDriver.getBuilder();
    535     Module * M = iBuilder->getModule();
    536 
    537     const unsigned segmentSize = codegen::SegmentSize;
    538 
    539     Type * const mBitBlockType = iBuilder->getBitBlockType();
    540     Type * const inputSizeTy = PointerType::get(iBuilder->getSizeTy(), 1);
    541     Type * const int32ty = iBuilder->getInt32Ty();
    542     Type * const voidTy = Type::getVoidTy(M->getContext());
    543     Type * const inputTy = PointerType::get(ArrayType::get(mBitBlockType, 4), 1);
    544     Type * const patternPtrTy = PointerType::get(iBuilder->getInt8Ty(), 1);
    545     Type * const outputTy = PointerType::get(ArrayType::get(mBitBlockType, editDistance+1), 1);
    546     Type * const stridesTy = PointerType::get(int32ty, 1);
    547 
    548     Function * const main = cast<Function>(M->getOrInsertFunction("Main", voidTy, inputTy, inputSizeTy, patternPtrTy, outputTy, stridesTy, nullptr));
    549     main->setCallingConv(CallingConv::C);
    550     auto args = main->arg_begin();
    551 
    552     Value * const inputStream = &*(args++);
    553     inputStream->setName("input");
    554     Value * const inputSizePtr = &*(args++);
    555     inputSizePtr->setName("inputSizePtr");
    556     Value * const pattStream = &*(args++);
    557     pattStream->setName("pattStream");
    558     Value * const resultStream = &*(args++);
    559     resultStream->setName("resultStream");
    560     Value * const stridesPtr = &*(args++);
    561     stridesPtr->setName("stridesPtr");
    562 
    563     iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main,0));
    564 
    565     Function * tidFunc = M->getFunction("llvm.nvvm.read.ptx.sreg.tid.x");
    566     Value * tid = iBuilder->CreateCall(tidFunc);
    567     Function * bidFunc = cast<Function>(M->getOrInsertFunction("llvm.nvvm.read.ptx.sreg.ctaid.x", int32ty, nullptr));
    568     Value * bid = iBuilder->CreateCall(bidFunc);
    569 
    570     Value * inputThreadPtr = iBuilder->CreateGEP(inputStream, tid);
    571     Value * strides = iBuilder->CreateLoad(stridesPtr);
    572     Value * outputBlocks = iBuilder->CreateMul(strides, ConstantInt::get(int32ty, iBuilder->getStride() / iBuilder->getBitBlockWidth()));
    573     Value * resultStreamPtr = iBuilder->CreateGEP(resultStream, iBuilder->CreateAdd(iBuilder->CreateMul(bid, outputBlocks), tid));
    574     Value * inputSize = iBuilder->CreateLoad(inputSizePtr);
    575 
    576     auto CCStream = pxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(4), 1);
    577     auto sourceK = pxDriver.addKernelInstance<kernel::MemorySourceKernel>(iBuilder, inputTy, segmentSize);
    578     sourceK->setInitialArguments({inputThreadPtr, inputSize});
    579     pxDriver.makeKernelCall(sourceK, {}, {CCStream});
    580 
    581     auto ResultStream = pxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(editDistance+1), resultStreamPtr, 1);
    582     auto editdk = pxDriver.addKernelInstance<kernel::editdGPUKernel>(iBuilder, editDistance, patternLen, groupSize);
    583      
    584     const unsigned numOfCarries = patternLen * (editDistance + 1) * 4 * groupSize;
    585     Type * strideCarryTy = ArrayType::get(mBitBlockType, numOfCarries);
    586     Value * strideCarry = iBuilder->CreateAlloca(strideCarryTy);
    587     iBuilder->CreateStore(Constant::getNullValue(strideCarryTy), strideCarry);
    588 
    589     editdk->setInitialArguments({pattStream, strideCarry});
    590     pxDriver.makeKernelCall(editdk, {CCStream}, {ResultStream});
    591 
    592     pxDriver.generatePipelineIR();
    593     pxDriver.deallocateBuffers();
    594     iBuilder->CreateRetVoid();
    595 
    596     pxDriver.finalizeObject();
    597 
    598 }
    599 
    600 void mergeGPUCodeGen(){
    601     NVPTXDriver pxDriver("merge");
    602     auto & iBuilder = pxDriver.getBuilder();
    603     Module * M = iBuilder->getModule();
    604 
    605     Type * const mBitBlockType = iBuilder->getBitBlockType();
    606     Type * const int32ty = iBuilder->getInt32Ty();
    607     Type * const voidTy = Type::getVoidTy(M->getContext());
    608     Type * const resultTy = PointerType::get(ArrayType::get(mBitBlockType, editDistance+1), 1);
    609     Type * const stridesTy = PointerType::get(int32ty, 1);
    610 
    611     Function * const main = cast<Function>(M->getOrInsertFunction("Main", voidTy, resultTy, stridesTy, nullptr));
    612     main->setCallingConv(CallingConv::C);
    613     Function::arg_iterator args = main->arg_begin();
    614 
    615     Value * const resultStream = &*(args++);
    616     resultStream->setName("resultStream");
    617     Value * const stridesPtr = &*(args++);
    618     stridesPtr->setName("stridesPtr");
    619 
    620     BasicBlock * entryBlock = BasicBlock::Create(iBuilder->getContext(), "entryBlock", main, 0);
    621     BasicBlock * strideLoopCond = BasicBlock::Create(iBuilder->getContext(), "strideLoopCond", main, 0);
    622     BasicBlock * strideLoopBody = BasicBlock::Create(iBuilder->getContext(), "strideLoopBody", main, 0);
    623     BasicBlock * stridesDone = BasicBlock::Create(iBuilder->getContext(), "stridesDone", main, 0);
    624 
    625     iBuilder->SetInsertPoint(entryBlock);
    626 
    627     Function * tidFunc = M->getFunction("llvm.nvvm.read.ptx.sreg.tid.x");
    628     Value * tid = iBuilder->CreateCall(tidFunc);
    629     Function * bidFunc = cast<Function>(M->getOrInsertFunction("llvm.nvvm.read.ptx.sreg.ctaid.x", int32ty, nullptr));
    630     Value * bid = iBuilder->CreateCall(bidFunc);
    631 
    632     Value * strides = iBuilder->CreateLoad(stridesPtr);
    633     Value * strideBlocks = ConstantInt::get(int32ty, iBuilder->getStride() / iBuilder->getBitBlockWidth());
    634     Value * outputBlocks = iBuilder->CreateMul(strides, strideBlocks);
    635     Value * resultStreamPtr = iBuilder->CreateGEP(resultStream, tid);
    636 
    637     iBuilder->CreateBr(strideLoopCond);
    638     iBuilder->SetInsertPoint(strideLoopCond);
    639     PHINode * strideNo = iBuilder->CreatePHI(int32ty, 2, "strideNo");
    640     strideNo->addIncoming(ConstantInt::get(int32ty, 0), entryBlock);
    641     Value * notDone = iBuilder->CreateICmpULT(strideNo, strides);
    642     iBuilder->CreateCondBr(notDone, strideLoopBody, stridesDone);
    643 
    644     iBuilder->SetInsertPoint(strideLoopBody);
    645     Value * myResultStreamPtr = iBuilder->CreateGEP(resultStreamPtr, {iBuilder->CreateMul(strideBlocks, strideNo)});
    646     Value * myResultStream = iBuilder->CreateLoad(iBuilder->CreateGEP(myResultStreamPtr, {iBuilder->getInt32(0), bid}));
    647     for (int i=1; i<codegen::GroupNum; i++){
    648         Value * nextStreamPtr = iBuilder->CreateGEP(myResultStreamPtr, {iBuilder->CreateMul(outputBlocks, iBuilder->getInt32(i)), bid});
    649         myResultStream = iBuilder->CreateOr(myResultStream, iBuilder->CreateLoad(nextStreamPtr));
    650     }
    651     iBuilder->CreateStore(myResultStream, iBuilder->CreateGEP(myResultStreamPtr, {iBuilder->getInt32(0), bid}));
    652     strideNo->addIncoming(iBuilder->CreateAdd(strideNo, ConstantInt::get(int32ty, 1)), strideLoopBody);
    653     iBuilder->CreateBr(strideLoopCond);
    654 
    655     iBuilder->SetInsertPoint(stridesDone);
    656     iBuilder->CreateRetVoid();
    657 
    658     pxDriver.finalizeObject();
    659 
    660 }
    661 #endif
    662 
    663 editdFunctionType editdScanCPUCodeGen(ParabixDriver & pxDriver) {
    664    
    665     auto & iBuilder = pxDriver.getBuilder();
    666     Module * M = iBuilder->getModule();
    667 
    668     Type * mBitBlockType = iBuilder->getBitBlockType();
    669     Type * const size_ty = iBuilder->getSizeTy();
    670     Type * const voidTy = Type::getVoidTy(M->getContext());
    671     Type * const inputType = PointerType::get(ArrayType::get(mBitBlockType, editDistance+1), 0);
    672 
    673     Function * const main = cast<Function>(M->getOrInsertFunction("Main", voidTy, inputType, size_ty, nullptr));
    674     main->setCallingConv(CallingConv::C);
    675     iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
    676     Function::arg_iterator args = main->arg_begin();
    677     Value * const inputStream = &*(args++);
    678     inputStream->setName("input");
    679     Value * const fileSize = &*(args++);
    680     fileSize->setName("fileSize");
    681 
    682     StreamSetBuffer * MatchResults = pxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(editDistance + 1));
    683     kernel::Kernel * sourceK = pxDriver.addKernelInstance<kernel::MemorySourceKernel>(iBuilder, editDistance + 1, 8);
    684     sourceK->setInitialArguments({inputStream, fileSize});
    685     pxDriver.makeKernelCall(sourceK, {}, {MatchResults});
    686 
    687     auto editdScanK = pxDriver.addKernelInstance<editdScanKernel>(iBuilder, editDistance);
    688     pxDriver.makeKernelCall(editdScanK, {MatchResults}, {});
    689     pxDriver.LinkFunction(*editdScanK, "wrapped_report_pos", &wrapped_report_pos);
    690     pxDriver.generatePipelineIR();
    691     pxDriver.deallocateBuffers();
    692     iBuilder->CreateRetVoid();
    693 
    694     pxDriver.finalizeObject();
    695 
    696     return reinterpret_cast<editdFunctionType>(pxDriver.getMain());
    697 
    698 }
    699 
    700426int main(int argc, char *argv[]) {
    701427    codegen::ParseCommandLineOptions(argc, argv);
     
    706432
    707433    if (MultiEditdKernels) {
    708         ParabixDriver pxDriver("editd");
    709         multiEditdPipeline(pxDriver);
    710         auto editd_ptr = reinterpret_cast<multiEditdFunctionType>(pxDriver.getMain());
     434        CPUDriver pxDriver("editd");
     435        auto editd = multiEditdPipeline(pxDriver);
    711436
    712437        std::string fileName = inputFiles[0];
     
    716441            exit(-1);
    717442        }
    718         editd_ptr(fd);
     443        editd(fd);
    719444        close(fd);
    720445        run_second_filter(pattern_segs, total_len, 0.15);
     
    722447    }
    723448
    724 #ifdef CUDA_ENABLED
    725     if (codegen::NVPTX)
    726         codegen::BlockSize = 64;
    727 #endif
    728 
    729     ParabixDriver pxDriver("preprocess");
    730     preprocessPipeline(pxDriver);
    731     auto preprocess_ptr = reinterpret_cast<preprocessFunctionType>(pxDriver.getMain());
     449    CPUDriver pxDriver("preprocess");
     450    auto preprocess_ptr = preprocessPipeline(pxDriver);
    732451    preprocess(preprocess_ptr);
    733452
    734 #ifdef CUDA_ENABLED
    735     if(codegen::NVPTX){
    736 
    737         std::ifstream t(PatternFilename);
    738         if (!t.is_open()) {
    739             std::cerr << "Error: cannot open " << PatternFilename << " for processing. Skipped.\n";
    740             exit(-1);
    741         }
    742         std::string patterns((std::istreambuf_iterator<char>(t)), std::istreambuf_iterator<char>());
    743 
    744         editdGPUCodeGen(pattVector[0].length());
    745         mergeGPUCodeGen();
    746         ulong * rslt = RunPTX(PTXFilename, chStream, size, patterns.c_str(), patterns.length(), editDistance);
    747 
    748         ParabixDriver scanDriver("scan");
    749         editdFunctionType editd_ptr = editdScanCPUCodeGen(scanDriver);
    750         editd(editd_ptr, (char*)rslt, size);
    751 
    752         run_second_filter(pattern_segs, total_len, 0.15);
    753 
    754         return 0;
    755     }
    756 #endif
    757 
    758453    if(pattVector.size() == 1){
    759454
    760         ParabixDriver pxDriver("editd");
    761         editdPipeline(pxDriver, pattVector);
    762         auto editd_ptr = reinterpret_cast<editdFunctionType>(pxDriver.getMain());
    763         editd(editd_ptr, chStream, size);
     455        CPUDriver pxDriver("editd");
     456        auto editd = editdPipeline(pxDriver, pattVector);
     457        editd(chStream, size);
    764458        std::cout << "total matches is " << matchList.size() << std::endl;
    765459    }
    766460    else{
    767         if (Threads == 1) { 
     461        if (Threads == 1) {
    768462            if (EditdIndexPatternKernels) {
    769                 ParabixDriver pxDriver("editd");
    770                 editdIndexPatternPipeline(pxDriver, pattVector[0].length());
    771                 auto editd_ptr = reinterpret_cast<editdIndexFunctionType>(pxDriver.getMain());
     463                CPUDriver pxDriver("editd");
     464                auto editd_ptr = editdIndexPatternPipeline(pxDriver, pattVector[0].length());
    772465
    773466                for(unsigned i=0; i<pattVector.size(); i+=groupSize){
     
    782475                for(unsigned i=0; i<pattGroups.size(); i++){
    783476
    784                     ParabixDriver pxDriver("editd");
    785                     editdPipeline(pxDriver, pattGroups[i]);
    786                     auto editd_ptr = reinterpret_cast<editdFunctionType>(pxDriver.getMain());
    787                     editd(editd_ptr, chStream, size);
     477                    CPUDriver pxDriver("editd");
     478                    auto editd = editdPipeline(pxDriver, pattGroups[i]);
     479                    editd(chStream, size);
    788480                }
    789481            }
     
    813505    }
    814506
     507    AlignedAllocator<char, 32> alloc;
     508    alloc.deallocate(chStream, 0);
     509
    815510    return 0;
    816511}
  • icGREP/icgrep-devel/icgrep/editd/editd_cpu_kernel.cpp

    r5985 r6184  
    9595}
    9696
    97 editdCPUKernel::editdCPUKernel(const std::unique_ptr<kernel::KernelBuilder> & b, unsigned dist, unsigned pattLen, unsigned groupSize) :
    98 BlockOrientedKernel("editd_cpu",
    99              {Binding{b->getStreamSetTy(4), "CCStream"}},
    100              {Binding{b->getStreamSetTy(dist + 1), "ResultStream"}},
    101              {Binding{PointerType::get(b->getInt8Ty(), 1), "pattStream"},
    102              Binding{PointerType::get(ArrayType::get(b->getBitBlockType(), pattLen * (dist + 1) * 4 * groupSize), 0), "strideCarry"}},
    103              {},
    104              {Binding{b->getBitBlockType(), "EOFmask"}}),
    105 mEditDistance(dist),
    106 mPatternLen(pattLen),
    107 mGroupSize(groupSize){
     97editdCPUKernel::editdCPUKernel(const std::unique_ptr<kernel::KernelBuilder> & b,
     98                               const unsigned patternLen, const unsigned groupSize,
     99                               Scalar * const pattStream,
     100                               StreamSet * const CCStream, StreamSet * const ResultStream)
     101: BlockOrientedKernel("editd_cpu" + std::to_string(patternLen) + "x" + std::to_string(groupSize),
     102// input stream
     103{Binding{"CCStream", CCStream}},
     104// output stream
     105{Binding{"ResultStream", ResultStream}},
     106// input scalar
     107{Binding{"pattStream", pattStream}},
     108// output scalar
     109{},
     110// internal scalars
     111{Binding{b->getBitBlockType(), "EOFmask"},
     112 Binding{ArrayType::get(b->getBitBlockType(), (patternLen * groupSize * 4 * ResultStream->getNumElements())), "strideCarry"}}) {
     113
    108114}
    109115
    110116}
    111 
    112 
  • icGREP/icgrep-devel/icgrep/editd/editd_cpu_kernel.h

    r5603 r6184  
    1717public:
    1818
    19     editdCPUKernel(const std::unique_ptr<kernel::KernelBuilder> & b, unsigned dist, unsigned pattLen, unsigned groupSize);
     19    editdCPUKernel(const std::unique_ptr<kernel::KernelBuilder> & b,
     20                   const unsigned patternLen, const unsigned groupSize,
     21                   Scalar * const pattStream,
     22                   StreamSet * const CCStream, StreamSet * const ResultStream);
    2023   
    2124
  • icGREP/icgrep-devel/icgrep/editd/editdscan_kernel.cpp

    r5440 r6184  
    1313namespace kernel {
    1414
    15 void editdScanKernel::generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & idb) {
    16     auto savePoint = idb->saveIP();
    17     Function * scanWordFunction = generateScanWordRoutine(idb);
    18     idb->restoreIP(savePoint);
     15void editdScanKernel::generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & b) {
     16    auto savePoint = b->saveIP();
     17    Function * scanWordFunction = generateScanWordRoutine(b);
     18    b->restoreIP(savePoint);
    1919
    20     const unsigned fieldCount = idb->getBitBlockWidth() / mScanwordBitWidth;
    21     Type * T = idb->getIntNTy(mScanwordBitWidth);
     20    const unsigned fieldCount = b->getBitBlockWidth() / mScanwordBitWidth;
     21    Type * T = b->getIntNTy(mScanwordBitWidth);
    2222    VectorType * scanwordVectorType =  VectorType::get(T, fieldCount);
    23     Value * blockNo = idb->getScalarField("BlockNo");
    24     Value * scanwordPos = idb->CreateMul(blockNo, ConstantInt::get(blockNo->getType(), idb->getBitBlockWidth()));
     23    Value * blockNo = b->getScalarField("BlockNo");
     24    Value * scanwordPos = b->CreateMul(blockNo, ConstantInt::get(blockNo->getType(), b->getBitBlockWidth()));
    2525   
    2626    std::vector<Value * > matchWordVectors;
    27     for(unsigned d = 0; d <= mEditDistance; d++) {
    28         Value * matches = idb->loadInputStreamBlock("matchResults", idb->getInt32(d));
    29         matchWordVectors.push_back(idb->CreateBitCast(matches, scanwordVectorType));
     27    for(unsigned d = 0; d < mNumElements; d++) {
     28        Value * matches = b->loadInputStreamBlock("matchResults", b->getInt32(d));
     29        matchWordVectors.push_back(b->CreateBitCast(matches, scanwordVectorType));
    3030    }
    3131   
    3232    for(unsigned i = 0; i < fieldCount; ++i) {
    33         for(unsigned d = 0; d <= mEditDistance; d++) {
    34             Value * matchWord = idb->CreateExtractElement(matchWordVectors[d], ConstantInt::get(T, i));
    35             idb->CreateCall(scanWordFunction, {matchWord, idb->getInt32(d), scanwordPos});
     33        for(unsigned d = 0; d < mNumElements; d++) {
     34            Value * matchWord = b->CreateExtractElement(matchWordVectors[d], ConstantInt::get(T, i));
     35            b->CreateCall(scanWordFunction, {matchWord, b->getInt32(d), scanwordPos});
    3636        }
    37         scanwordPos = idb->CreateAdd(scanwordPos, ConstantInt::get(T, mScanwordBitWidth));
     37        scanwordPos = b->CreateAdd(scanwordPos, ConstantInt::get(T, mScanwordBitWidth));
    3838    }
    3939
    40     idb->setScalarField("BlockNo", idb->CreateAdd(blockNo, idb->getSize(1)));
     40    b->setScalarField("BlockNo", b->CreateAdd(blockNo, b->getSize(1)));
    4141}
    4242
    43 Function * editdScanKernel::generateScanWordRoutine(const std::unique_ptr<KernelBuilder> &iBuilder) const {
     43Function * editdScanKernel::generateScanWordRoutine(const std::unique_ptr<KernelBuilder> &b) const {
    4444
    45     IntegerType * T = iBuilder->getIntNTy(mScanwordBitWidth);
    46     Module * const m = iBuilder->getModule();
     45    IntegerType * T = b->getIntNTy(mScanwordBitWidth);
     46    Module * const m = b->getModule();
    4747
    48     Function * scanFunc = cast<Function>(m->getOrInsertFunction("scan_word", iBuilder->getVoidTy(), T, iBuilder->getInt32Ty(), T, nullptr));
     48    Function * scanFunc = cast<Function>(m->getOrInsertFunction("scan_word", b->getVoidTy(), T, b->getInt32Ty(), T, nullptr));
    4949    scanFunc->setCallingConv(CallingConv::C);
    5050    Function::arg_iterator args = scanFunc->arg_begin();
     
    5757    basePos->setName("basePos");
    5858
    59     Constant * matchProcessor = m->getOrInsertFunction("wrapped_report_pos", iBuilder->getVoidTy(), T, iBuilder->getInt32Ty(), nullptr);
    60     BasicBlock * entryBlock = BasicBlock::Create(iBuilder->getContext(), "entry", scanFunc, 0);
    61     BasicBlock * matchesCondBlock = BasicBlock::Create(iBuilder->getContext(), "matchesCond", scanFunc, 0);
    62     BasicBlock * matchesLoopBlock = BasicBlock::Create(iBuilder->getContext(), "matchesLoop", scanFunc, 0);
    63     BasicBlock * matchesDoneBlock = BasicBlock::Create(iBuilder->getContext(), "matchesDone", scanFunc, 0);
     59    Constant * matchProcessor = m->getOrInsertFunction("wrapped_report_pos", b->getVoidTy(), T, b->getInt32Ty(), nullptr);
     60    BasicBlock * entryBlock = BasicBlock::Create(b->getContext(), "entry", scanFunc, 0);
     61    BasicBlock * matchesCondBlock = BasicBlock::Create(b->getContext(), "matchesCond", scanFunc, 0);
     62    BasicBlock * matchesLoopBlock = BasicBlock::Create(b->getContext(), "matchesLoop", scanFunc, 0);
     63    BasicBlock * matchesDoneBlock = BasicBlock::Create(b->getContext(), "matchesDone", scanFunc, 0);
    6464
    65     iBuilder->SetInsertPoint(entryBlock);
    66     iBuilder->CreateBr(matchesCondBlock);
     65    b->SetInsertPoint(entryBlock);
     66    b->CreateBr(matchesCondBlock);
    6767
    68     iBuilder->SetInsertPoint(matchesCondBlock);
    69     PHINode * matches_phi = iBuilder->CreatePHI(T, 2, "matches");
     68    b->SetInsertPoint(matchesCondBlock);
     69    PHINode * matches_phi = b->CreatePHI(T, 2, "matches");
    7070    matches_phi->addIncoming(matchWord, entryBlock);
    71     Value * have_matches_cond = iBuilder->CreateICmpUGT(matches_phi, ConstantInt::get(T, 0));
    72     iBuilder->CreateCondBr(have_matches_cond, matchesLoopBlock, matchesDoneBlock);
     71    Value * have_matches_cond = b->CreateICmpUGT(matches_phi, ConstantInt::get(T, 0));
     72    b->CreateCondBr(have_matches_cond, matchesLoopBlock, matchesDoneBlock);
    7373
    74     iBuilder->SetInsertPoint(matchesLoopBlock);
    75     Value * match_pos = iBuilder->CreateAdd(iBuilder->CreateCountForwardZeroes(matches_phi), basePos);
    76     Value * matches_new = iBuilder->CreateAnd(matches_phi, iBuilder->CreateSub(matches_phi, ConstantInt::get(T, 1)));
     74    b->SetInsertPoint(matchesLoopBlock);
     75    Value * match_pos = b->CreateAdd(b->CreateCountForwardZeroes(matches_phi), basePos);
     76    Value * matches_new = b->CreateAnd(matches_phi, b->CreateSub(matches_phi, ConstantInt::get(T, 1)));
    7777    matches_phi->addIncoming(matches_new, matchesLoopBlock);
    78     iBuilder->CreateCall(matchProcessor, std::vector<Value *>({match_pos, dist}));
    79     iBuilder->CreateBr(matchesCondBlock);
     78    b->CreateCall(matchProcessor, std::vector<Value *>({match_pos, dist}));
     79    b->CreateBr(matchesCondBlock);
    8080
    81     iBuilder->SetInsertPoint(matchesDoneBlock);
    82     iBuilder -> CreateRetVoid();
     81    b->SetInsertPoint(matchesDoneBlock);
     82    b -> CreateRetVoid();
    8383
    8484    return scanFunc;
     
    8686}
    8787
    88 editdScanKernel::editdScanKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, unsigned dist) :
    89 BlockOrientedKernel("scanMatch",
    90               {Binding{iBuilder->getStreamSetTy(dist + 1), "matchResults"}},
    91               {}, {}, {}, {Binding{iBuilder->getSizeTy(), "BlockNo"}}),
    92 mEditDistance(dist),
    93 mScanwordBitWidth(iBuilder->getSizeTy()->getBitWidth()) {
    94 
     88editdScanKernel::editdScanKernel(const std::unique_ptr<kernel::KernelBuilder> & b, StreamSet * matchResults) :
     89BlockOrientedKernel("editdScanMatch" + std::to_string(matchResults->getNumElements()),
     90              {Binding{"matchResults", matchResults}},
     91              {}, {}, {}, {Binding{b->getSizeTy(), "BlockNo"}}),
     92mNumElements(matchResults->getNumElements()),
     93mScanwordBitWidth(b->getSizeTy()->getBitWidth()) {
     94    addAttribute(SideEffecting());
    9595}
    9696
  • icGREP/icgrep-devel/icgrep/editd/editdscan_kernel.h

    r5440 r6184  
    1515class editdScanKernel : public BlockOrientedKernel {
    1616public:
    17     editdScanKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, unsigned dist);
     17    editdScanKernel(const std::unique_ptr<kernel::KernelBuilder> & b, StreamSet * matchResults);
    1818       
    1919private:
     
    2121    llvm::Function * generateScanWordRoutine(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) const;
    2222       
    23     unsigned mEditDistance;
     23    unsigned mNumElements;
    2424    unsigned mScanwordBitWidth;
    2525};
Note: See TracChangeset for help on using the changeset viewer.