Changeset 5481 for icGREP


Ignore:
Timestamp:
May 30, 2017, 10:55:14 AM (2 years ago)
Author:
cameron
Message:

Refactoring grepEngine: separate out codepoint/property value grep

Location:
icGREP/icgrep-devel/icgrep
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/grep_engine.cpp

    r5478 r5481  
    4747
    4848
    49 static re::CC * parsedCodePointSet = nullptr;
    50 
    51 static std::vector<std::string> parsedPropertyValues;
    52 
    5349size_t * startPoints = nullptr;
    5450size_t * accumBytes = nullptr;
     
    116112    auto f = reinterpret_cast<GrepFunctionType>(mGrepDriver->getMain());
    117113    return f(fileDescriptor, fileIdx);
    118 }
    119 
    120 void GrepEngine::doGrep(const char * buffer, const uint64_t length, const uint32_t fileIdx) const {
    121     assert (mGrepDriver);
    122     typedef uint64_t (*GrepFunctionType)(const char * buffer, const uint64_t length, const uint32_t fileIdx);
    123     auto f = reinterpret_cast<GrepFunctionType>(mGrepDriver->getMain());
    124     f(buffer, length, fileIdx);
    125114}
    126115
     
    199188}
    200189
    201 const int MatchFoundReturnCode = 0;
    202 const int MatchNotFoundReturnCode = 1;
    203190void PrintResult(GrepModeType grepMode, std::vector<size_t> & total_CountOnly){
    204191    if (grepMode == NormalMode) {
    205         int returnCode = MatchNotFoundReturnCode;
     192        int returnCode = MatchNotFoundExitCode;
    206193        for (unsigned i = 0; i < inputFiles.size(); ++i){
    207194            std::cout << resultStrs[i].str();
    208             if (!resultStrs[i].str().empty()) returnCode = MatchFoundReturnCode;
     195            if (!resultStrs[i].str().empty()) returnCode = MatchFoundExitCode;
    209196        }
    210197        exit(returnCode);
     
    223210            };
    224211        }
    225         exit(total == 0 ? MatchNotFoundReturnCode : MatchFoundReturnCode);
     212        exit(total == 0 ? MatchNotFoundExitCode : MatchFoundExitCode);
    226213    }
    227214    else if (grepMode == FilesWithMatch || grepMode == FilesWithoutMatch ) {
     
    234221            total += total_CountOnly[i];
    235222        }
    236         exit(total == 0 ? MatchNotFoundReturnCode : MatchFoundReturnCode);
     223        exit(total == 0 ? MatchNotFoundExitCode : MatchFoundExitCode);
    237224    } else /* QuietMode */ {
    238225        for (unsigned i = 0; i < inputFiles.size(); ++i){
    239             if (total_CountOnly[i] > 0) exit(MatchFoundReturnCode);
    240         }
    241         exit(MatchNotFoundReturnCode);
    242     }
    243 }
     226            if (total_CountOnly[i] > 0) exit(MatchFoundExitCode);
     227        }
     228        exit(MatchNotFoundExitCode);
     229    }
     230}
     231
     232void GrepEngine::grepCodeGen_nvptx(std::vector<re::RE *> REs, const GrepModeType grepMode, const bool UTF_16) {
     233
     234    assert (mGrepDriver == nullptr);
     235
     236    mGrepDriver = new NVPTXDriver("engine");
     237    auto & idb = mGrepDriver->getBuilder();
     238    Module * M = idb->getModule();
     239
     240    const unsigned segmentSize = codegen::SegmentSize;
     241    const unsigned bufferSegments = codegen::BufferSegments * codegen::ThreadNum;
     242    const unsigned encodingBits = UTF_16 ? 16 : 8;
     243
     244    Type * const int64Ty = idb->getInt64Ty();
     245    Type * const int32Ty = idb->getInt32Ty();
     246    Type * const size_ty = idb->getSizeTy();
     247    Type * const sizeTyPtr = PointerType::get(size_ty, 1);
     248    Type * const int64tyPtr = PointerType::get(int64Ty, 1);
     249    Type * const voidTy = idb->getVoidTy();
     250
     251    Function * mainFunc = cast<Function>(M->getOrInsertFunction("Main", voidTy, int64tyPtr, sizeTyPtr, sizeTyPtr, int64tyPtr, nullptr));
     252    mainFunc->setCallingConv(CallingConv::C);
     253    idb->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
     254    auto args = mainFunc->arg_begin();
     255
     256    Value * const inputPtr = &*(args++);
     257    inputPtr->setName("inputPtr");
     258    Value * const startPointsPtr = &*(args++);
     259    startPointsPtr->setName("startPointsPtr");
     260    Value * const bufferSizesPtr = &*(args++);
     261    bufferSizesPtr->setName("bufferSizesPtr");
     262    Value * const outputPtr = &*(args++);
     263    outputPtr->setName("outputPtr");
     264
     265    Function * tidFunc = M->getFunction("llvm.nvvm.read.ptx.sreg.tid.x");
     266    Value * tid = idb->CreateCall(tidFunc);
     267    Function * bidFunc = cast<Function>(M->getOrInsertFunction("llvm.nvvm.read.ptx.sreg.ctaid.x", int32Ty, nullptr));
     268    Value * bid = idb->CreateCall(bidFunc);
     269
     270    Value * startPoint = idb->CreateLoad(idb->CreateGEP(startPointsPtr, bid));
     271    Value * startBlock = idb->CreateUDiv(startPoint, ConstantInt::get(int64Ty, idb->getBitBlockWidth()));
     272    Type * const inputStreamType = PointerType::get(ArrayType::get(ArrayType::get(idb->getBitBlockType(), 8), 1), 1);   
     273    Value * inputStreamPtr = idb->CreateGEP(idb->CreateBitCast(inputPtr, inputStreamType), startBlock);
     274    Value * inputStream = idb->CreateGEP(inputStreamPtr, tid);
     275    Value * bufferSize = idb->CreateLoad(idb->CreateGEP(bufferSizesPtr, bid));
     276
     277    StreamSetBuffer * ByteStream = mGrepDriver->addBuffer(make_unique<SourceBuffer>(idb, idb->getStreamSetTy(1, 8), 1));
     278    kernel::Kernel * sourceK = mGrepDriver->addKernelInstance(make_unique<kernel::MemorySourceKernel>(idb, inputStreamType, segmentSize));
     279    sourceK->setInitialArguments({inputStream, bufferSize});
     280    mGrepDriver->makeKernelCall(sourceK, {}, {ByteStream});
     281
     282    StreamSetBuffer * BasisBits = mGrepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(8, 1), segmentSize * bufferSegments));
     283    kernel::Kernel * s2pk = mGrepDriver->addKernelInstance(make_unique<kernel::S2PKernel>(idb));
     284    mGrepDriver->makeKernelCall(s2pk, {ByteStream}, {BasisBits});
     285 
     286    StreamSetBuffer * LineBreakStream = mGrepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
     287    kernel::Kernel * linebreakK = mGrepDriver->addKernelInstance(make_unique<kernel::LineBreakKernelBuilder>(idb, encodingBits));
     288    mGrepDriver->makeKernelCall(linebreakK, {BasisBits}, {LineBreakStream});
     289   
     290    const auto n = REs.size();
     291
     292    std::vector<StreamSetBuffer *> MatchResultsBufs(n);
     293
     294    for(unsigned i = 0; i < n; ++i){
     295        StreamSetBuffer * MatchResults = mGrepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
     296        kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance(make_unique<kernel::ICGrepKernel>(idb, REs[i]));
     297        mGrepDriver->makeKernelCall(icgrepK, {BasisBits, LineBreakStream}, {MatchResults});
     298        MatchResultsBufs[i] = MatchResults;
     299    }
     300    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
     301    if (REs.size() > 1) {
     302        MergedResults = mGrepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
     303        kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance(make_unique<kernel::StreamsMerge>(idb, 1, REs.size()));
     304        mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
     305    }
     306
     307    kernel::MatchCount matchCountK(idb);
     308    mGrepDriver->addKernelCall(matchCountK, {MergedResults}, {});
     309    mGrepDriver->generatePipelineIR();
     310
     311    idb->setKernel(&matchCountK);
     312    Value * matchedLineCount = idb->getScalarField("matchedLineCount");
     313    matchedLineCount = idb->CreateZExt(matchedLineCount, int64Ty);
     314   
     315    Value * strideBlocks = ConstantInt::get(int32Ty, idb->getStride() / idb->getBitBlockWidth());
     316    Value * outputThreadPtr = idb->CreateGEP(outputPtr, idb->CreateAdd(idb->CreateMul(bid, strideBlocks), tid));
     317    idb->CreateStore(matchedLineCount, outputThreadPtr);
     318    idb->CreateRetVoid();
     319
     320    mGrepDriver->finalizeObject();
     321}
     322
     323void GrepEngine::grepCodeGen(std::vector<re::RE *> REs, const GrepModeType grepMode, const bool UTF_16, GrepSource grepSource) {
     324
     325    assert (mGrepDriver == nullptr);
     326    mGrepDriver = new ParabixDriver("engine");
     327    auto & idb = mGrepDriver->getBuilder();
     328    Module * M = idb->getModule();
     329
     330    const unsigned segmentSize = codegen::SegmentSize;
     331    const unsigned bufferSegments = codegen::BufferSegments * codegen::ThreadNum;
     332    const unsigned encodingBits = UTF_16 ? 16 : 8;
     333
     334    Type * const int64Ty = idb->getInt64Ty();
     335    Type * const int32Ty = idb->getInt32Ty();
     336
     337    Function * mainFunc = nullptr;
     338    Value * fileIdx = nullptr;
     339    StreamSetBuffer * ByteStream = nullptr;
     340    kernel::Kernel * sourceK = nullptr;
     341   
     342    size_t MatchLimit = ((grepMode == QuietMode) | (grepMode == FilesWithMatch) | (grepMode == FilesWithoutMatch)) ? 1 : MaxCountFlag;
     343
     344    if (grepSource == GrepSource::Internal) {
     345
     346        mainFunc = cast<Function>(M->getOrInsertFunction("Main", int64Ty, idb->getInt8PtrTy(), int64Ty, int32Ty, nullptr));
     347        mainFunc->setCallingConv(CallingConv::C);
     348        idb->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
     349        auto args = mainFunc->arg_begin();
     350
     351        Value * const buffer = &*(args++);
     352        buffer->setName("buffer");
     353
     354        Value * length = &*(args++);
     355        length->setName("length");
     356        length = idb->CreateZExtOrTrunc(length, idb->getSizeTy());
     357
     358        fileIdx = &*(args++);
     359        fileIdx->setName("fileIdx");
     360
     361        ByteStream = mGrepDriver->addBuffer(make_unique<SourceBuffer>(idb, idb->getStreamSetTy(1, 8)));
     362
     363        sourceK = mGrepDriver->addKernelInstance(make_unique<kernel::MemorySourceKernel>(idb, idb->getInt8PtrTy(), segmentSize));
     364        sourceK->setInitialArguments({buffer, length});
     365
     366    } else {
     367
     368        mainFunc = cast<Function>(M->getOrInsertFunction("Main", int64Ty, idb->getInt32Ty(), int32Ty, nullptr));
     369        mainFunc->setCallingConv(CallingConv::C);
     370        idb->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
     371        auto args = mainFunc->arg_begin();
     372
     373        Value * const fileDescriptor = &*(args++);
     374        fileDescriptor->setName("fileDescriptor");
     375        fileIdx = &*(args++);
     376        fileIdx->setName("fileIdx");
     377
     378        ByteStream = mGrepDriver->addBuffer(make_unique<SourceBuffer>(idb, idb->getStreamSetTy(1, 8)));
     379
     380        if (grepSource == GrepSource::File) {
     381            sourceK = mGrepDriver->addKernelInstance(make_unique<kernel::MMapSourceKernel>(idb, segmentSize));
     382            sourceK->setInitialArguments({fileDescriptor});
     383        } else { // if (grepSource == GrepSource::StdIn) {
     384            sourceK = mGrepDriver->addKernelInstance(make_unique<kernel::ReadSourceKernel>(idb, segmentSize));
     385            sourceK->setInitialArguments({idb->getInt32(STDIN_FILENO)});
     386        }
     387    }
     388
     389    mGrepDriver->makeKernelCall(sourceK, {}, {ByteStream});
     390    StreamSetBuffer * BasisBits = mGrepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(8, 1), segmentSize * bufferSegments));
     391   
     392    kernel::Kernel * s2pk = mGrepDriver->addKernelInstance(make_unique<kernel::S2PKernel>(idb));
     393    mGrepDriver->makeKernelCall(s2pk, {ByteStream}, {BasisBits});
     394   
     395    kernel::Kernel * linebreakK = mGrepDriver->addKernelInstance(make_unique<kernel::LineBreakKernelBuilder>(idb, encodingBits));
     396    StreamSetBuffer * LineBreakStream = mGrepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
     397    mGrepDriver->makeKernelCall(linebreakK, {BasisBits}, {LineBreakStream});
     398   
     399    const auto n = REs.size();
     400
     401    std::vector<StreamSetBuffer *> MatchResultsBufs(n);
     402
     403    for(unsigned i = 0; i < n; ++i){
     404        StreamSetBuffer * MatchResults = mGrepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
     405        kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance(make_unique<kernel::ICGrepKernel>(idb, REs[i]));
     406        mGrepDriver->makeKernelCall(icgrepK, {BasisBits, LineBreakStream}, {MatchResults});
     407        MatchResultsBufs[i] = MatchResults;
     408    }
     409    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
     410    if (REs.size() > 1) {
     411        MergedResults = mGrepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
     412        kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance(make_unique<kernel::StreamsMerge>(idb, 1, REs.size()));
     413        mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
     414    }
     415   
     416    if (InvertMatchFlag) {
     417        kernel::Kernel * invertK = mGrepDriver->addKernelInstance(make_unique<kernel::InvertMatchesKernel>(idb));
     418        StreamSetBuffer * OriginalMatches = MergedResults;
     419        MergedResults = mGrepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
     420        mGrepDriver->makeKernelCall(invertK, {OriginalMatches, LineBreakStream}, {MergedResults});
     421    }
     422    if (MatchLimit > 0) {
     423        kernel::Kernel * untilK = mGrepDriver->addKernelInstance(make_unique<kernel::UntilNkernel>(idb));
     424        untilK->setInitialArguments({idb->getSize(MatchLimit)});
     425        StreamSetBuffer * AllMatches = MergedResults;
     426        MergedResults = mGrepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
     427        mGrepDriver->makeKernelCall(untilK, {AllMatches}, {MergedResults});
     428    }
     429    if (grepMode != NormalMode) {
     430        kernel::Kernel * matchCountK = mGrepDriver->addKernelInstance(make_unique<kernel::MatchCount>(idb));
     431        mGrepDriver->makeKernelCall(matchCountK, {MergedResults}, {});
     432        mGrepDriver->generatePipelineIR();
     433        idb->setKernel(matchCountK);
     434        Value * matchedLineCount = idb->getScalarField("matchedLineCount");
     435        matchedLineCount = idb->CreateZExt(matchedLineCount, int64Ty);
     436        idb->CreateRet(matchedLineCount);
     437    } else {
     438        kernel::Kernel * scanMatchK = mGrepDriver->addKernelInstance(make_unique<kernel::ScanMatchKernel>(idb, GrepType::Normal, encodingBits));
     439        scanMatchK->setInitialArguments({fileIdx});
     440        mGrepDriver->makeKernelCall(scanMatchK, {MergedResults, LineBreakStream, ByteStream}, {});
     441        if (UTF_16) {
     442            mGrepDriver->LinkFunction(*scanMatchK, "matcher", &wrapped_report_match<uint16_t>);
     443        } else {
     444            mGrepDriver->LinkFunction(*scanMatchK, "matcher", &wrapped_report_match<uint8_t>);
     445        }
     446        mGrepDriver->generatePipelineIR();
     447        idb->CreateRet(idb->getInt64(0));
     448    }
     449    mGrepDriver->finalizeObject();
     450}
     451
     452GrepEngine::GrepEngine()
     453: mGrepDriver(nullptr) {
     454
     455}
     456
     457GrepEngine::~GrepEngine() {
     458    delete mGrepDriver;
     459}
     460
     461
     462   
     463static re::CC * parsedCodePointSet = nullptr;
    244464
    245465void insert_codepoints(const size_t lineNum, const size_t line_start, const size_t line_end, const char * const buffer) {
     
    262482}
    263483
     484re::CC * grepCodepoints(re::RE * pattern, char * UnicodeDataBuffer, size_t bufferLength) {
     485    parsedCodePointSet = re::makeCC();       
     486    const unsigned segmentSize = 8;
     487   
     488    ParabixDriver pxDriver("codepointEngine");
     489    auto & idb = pxDriver.getBuilder();
     490    Module * M = idb->getModule();
     491   
     492    Function * mainFunc = cast<Function>(M->getOrInsertFunction("Main", idb->getVoidTy(), idb->getInt8PtrTy(), idb->getSizeTy(), nullptr));
     493    mainFunc->setCallingConv(CallingConv::C);
     494    auto args = mainFunc->arg_begin();
     495    Value * const buffer = &*(args++);
     496    buffer->setName("buffer");
     497    Value * length = &*(args++);
     498    length->setName("length");
     499   
     500    idb->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
     501   
     502    StreamSetBuffer * ByteStream = pxDriver.addBuffer(make_unique<SourceBuffer>(idb, idb->getStreamSetTy(1, 8)));
     503    kernel::Kernel * sourceK = pxDriver.addKernelInstance(make_unique<kernel::MemorySourceKernel>(idb, idb->getInt8PtrTy(), segmentSize));
     504    sourceK->setInitialArguments({buffer, length});
     505    pxDriver.makeKernelCall(sourceK, {}, {ByteStream});
     506   
     507    StreamSetBuffer * BasisBits = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(8, 1), segmentSize));
     508   
     509    kernel::Kernel * s2pk = pxDriver.addKernelInstance(make_unique<kernel::S2PKernel>(idb));
     510    pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
     511   
     512    kernel::Kernel * linebreakK = pxDriver.addKernelInstance(make_unique<kernel::LineBreakKernelBuilder>(idb, 8));
     513    StreamSetBuffer * LineBreakStream = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize));
     514    pxDriver.makeKernelCall(linebreakK, {BasisBits}, {LineBreakStream});
     515   
     516    StreamSetBuffer * MatchResults = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize));
     517    kernel::Kernel * icgrepK = pxDriver.addKernelInstance(make_unique<kernel::ICGrepKernel>(idb, pattern));
     518    pxDriver.makeKernelCall(icgrepK, {BasisBits, LineBreakStream}, {MatchResults});
     519   
     520    kernel::Kernel * scanMatchK = pxDriver.addKernelInstance(make_unique<kernel::ScanMatchKernel>(idb, GrepType::NameExpression, 8));
     521    scanMatchK->setInitialArguments({idb->getInt32(0)});
     522    pxDriver.makeKernelCall(scanMatchK, {MatchResults, LineBreakStream, ByteStream}, {});
     523    pxDriver.LinkFunction(*scanMatchK, "matcher", &insert_codepoints);
     524    pxDriver.generatePipelineIR();
     525    idb->CreateRetVoid();
     526    pxDriver.finalizeObject();
     527   
     528    typedef void (*GrepFunctionType)(const char * buffer, const size_t length);
     529    auto f = reinterpret_cast<GrepFunctionType>(pxDriver.getMain());
     530    f(UnicodeDataBuffer, bufferLength);
     531   
     532    return parsedCodePointSet;   
     533}
     534
     535   
     536static std::vector<std::string> parsedPropertyValues;
     537
    264538void insert_property_values(size_t lineNum, size_t line_start, size_t line_end, const char * buffer) {
    265539    assert (line_start <= line_end);
     
    267541}
    268542
    269 void GrepEngine::grepCodeGen_nvptx(std::vector<re::RE *> REs, const GrepModeType grepMode, const bool UTF_16) {
    270 
    271     assert (mGrepDriver == nullptr);
    272 
    273     mGrepDriver = new NVPTXDriver("engine");
    274     auto & idb = mGrepDriver->getBuilder();
    275     Module * M = idb->getModule();
    276 
    277     const unsigned segmentSize = codegen::SegmentSize;
    278     const unsigned bufferSegments = codegen::BufferSegments * codegen::ThreadNum;
    279     const unsigned encodingBits = UTF_16 ? 16 : 8;
    280 
    281     Type * const int64Ty = idb->getInt64Ty();
    282     Type * const int32Ty = idb->getInt32Ty();
    283     Type * const size_ty = idb->getSizeTy();
    284     Type * const sizeTyPtr = PointerType::get(size_ty, 1);
    285     Type * const int64tyPtr = PointerType::get(int64Ty, 1);
    286     Type * const voidTy = idb->getVoidTy();
    287 
    288     Function * mainFunc = cast<Function>(M->getOrInsertFunction("Main", voidTy, int64tyPtr, sizeTyPtr, sizeTyPtr, int64tyPtr, nullptr));
    289     mainFunc->setCallingConv(CallingConv::C);
    290     idb->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
    291     auto args = mainFunc->arg_begin();
    292 
    293     Value * const inputPtr = &*(args++);
    294     inputPtr->setName("inputPtr");
    295     Value * const startPointsPtr = &*(args++);
    296     startPointsPtr->setName("startPointsPtr");
    297     Value * const bufferSizesPtr = &*(args++);
    298     bufferSizesPtr->setName("bufferSizesPtr");
    299     Value * const outputPtr = &*(args++);
    300     outputPtr->setName("outputPtr");
    301 
    302     Function * tidFunc = M->getFunction("llvm.nvvm.read.ptx.sreg.tid.x");
    303     Value * tid = idb->CreateCall(tidFunc);
    304     Function * bidFunc = cast<Function>(M->getOrInsertFunction("llvm.nvvm.read.ptx.sreg.ctaid.x", int32Ty, nullptr));
    305     Value * bid = idb->CreateCall(bidFunc);
    306 
    307     Value * startPoint = idb->CreateLoad(idb->CreateGEP(startPointsPtr, bid));
    308     Value * startBlock = idb->CreateUDiv(startPoint, ConstantInt::get(int64Ty, idb->getBitBlockWidth()));
    309     Type * const inputStreamType = PointerType::get(ArrayType::get(ArrayType::get(idb->getBitBlockType(), 8), 1), 1);   
    310     Value * inputStreamPtr = idb->CreateGEP(idb->CreateBitCast(inputPtr, inputStreamType), startBlock);
    311     Value * inputStream = idb->CreateGEP(inputStreamPtr, tid);
    312     Value * bufferSize = idb->CreateLoad(idb->CreateGEP(bufferSizesPtr, bid));
    313 
    314     StreamSetBuffer * ByteStream = mGrepDriver->addBuffer(make_unique<SourceBuffer>(idb, idb->getStreamSetTy(1, 8), 1));
    315     kernel::Kernel * sourceK = mGrepDriver->addKernelInstance(make_unique<kernel::MemorySourceKernel>(idb, inputStreamType, segmentSize));
    316     sourceK->setInitialArguments({inputStream, bufferSize});
    317     mGrepDriver->makeKernelCall(sourceK, {}, {ByteStream});
    318 
    319     StreamSetBuffer * BasisBits = mGrepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(8, 1), segmentSize * bufferSegments));
    320     kernel::Kernel * s2pk = mGrepDriver->addKernelInstance(make_unique<kernel::S2PKernel>(idb));
    321     mGrepDriver->makeKernelCall(s2pk, {ByteStream}, {BasisBits});
    322  
    323     StreamSetBuffer * LineBreakStream = mGrepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
    324     kernel::Kernel * linebreakK = mGrepDriver->addKernelInstance(make_unique<kernel::LineBreakKernelBuilder>(idb, encodingBits));
    325     mGrepDriver->makeKernelCall(linebreakK, {BasisBits}, {LineBreakStream});
    326    
    327     const auto n = REs.size();
    328 
    329     std::vector<StreamSetBuffer *> MatchResultsBufs(n);
    330 
    331     for(unsigned i = 0; i < n; ++i){
    332         StreamSetBuffer * MatchResults = mGrepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
    333         kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance(make_unique<kernel::ICGrepKernel>(idb, REs[i]));
    334         mGrepDriver->makeKernelCall(icgrepK, {BasisBits, LineBreakStream}, {MatchResults});
    335         MatchResultsBufs[i] = MatchResults;
    336     }
    337     StreamSetBuffer * MergedResults = MatchResultsBufs[0];
    338     if (REs.size() > 1) {
    339         MergedResults = mGrepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
    340         kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance(make_unique<kernel::StreamsMerge>(idb, 1, REs.size()));
    341         mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
    342     }
    343 
    344     kernel::MatchCount matchCountK(idb);
    345     mGrepDriver->addKernelCall(matchCountK, {MergedResults}, {});
    346     mGrepDriver->generatePipelineIR();
    347 
    348     idb->setKernel(&matchCountK);
    349     Value * matchedLineCount = idb->getScalarField("matchedLineCount");
    350     matchedLineCount = idb->CreateZExt(matchedLineCount, int64Ty);
    351    
    352     Value * strideBlocks = ConstantInt::get(int32Ty, idb->getStride() / idb->getBitBlockWidth());
    353     Value * outputThreadPtr = idb->CreateGEP(outputPtr, idb->CreateAdd(idb->CreateMul(bid, strideBlocks), tid));
    354     idb->CreateStore(matchedLineCount, outputThreadPtr);
    355     idb->CreateRetVoid();
    356 
    357     mGrepDriver->finalizeObject();
    358 }
    359 
    360 void GrepEngine::grepCodeGen(std::vector<re::RE *> REs, const GrepModeType grepMode, const bool UTF_16, GrepSource grepSource, const GrepType grepType) {
    361 
    362     assert (mGrepDriver == nullptr);
    363     mGrepDriver = new ParabixDriver("engine");
    364     auto & idb = mGrepDriver->getBuilder();
    365     Module * M = idb->getModule();
    366 
    367     const unsigned segmentSize = codegen::SegmentSize;
    368     const unsigned bufferSegments = codegen::BufferSegments * codegen::ThreadNum;
    369     const unsigned encodingBits = UTF_16 ? 16 : 8;
    370 
    371     Type * const int64Ty = idb->getInt64Ty();
    372     Type * const int32Ty = idb->getInt32Ty();
    373 
    374     Function * mainFunc = nullptr;
    375     Value * fileIdx = nullptr;
    376     StreamSetBuffer * ByteStream = nullptr;
    377     kernel::Kernel * sourceK = nullptr;
    378    
    379     size_t MatchLimit = ((grepMode == QuietMode) | (grepMode == FilesWithMatch) | (grepMode == FilesWithoutMatch)) ? 1 : MaxCountFlag;
    380 
    381     if (grepSource == GrepSource::Internal) {
    382 
    383         mainFunc = cast<Function>(M->getOrInsertFunction("Main", int64Ty, idb->getInt8PtrTy(), int64Ty, int32Ty, nullptr));
    384         mainFunc->setCallingConv(CallingConv::C);
    385         idb->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
    386         auto args = mainFunc->arg_begin();
    387 
    388         Value * const buffer = &*(args++);
    389         buffer->setName("buffer");
    390 
    391         Value * length = &*(args++);
    392         length->setName("length");
    393         length = idb->CreateZExtOrTrunc(length, idb->getSizeTy());
    394 
    395         fileIdx = &*(args++);
    396         fileIdx->setName("fileIdx");
    397 
    398         ByteStream = mGrepDriver->addBuffer(make_unique<SourceBuffer>(idb, idb->getStreamSetTy(1, 8)));
    399 
    400         sourceK = mGrepDriver->addKernelInstance(make_unique<kernel::MemorySourceKernel>(idb, idb->getInt8PtrTy(), segmentSize));
    401         sourceK->setInitialArguments({buffer, length});
    402 
    403     } else {
    404 
    405         mainFunc = cast<Function>(M->getOrInsertFunction("Main", int64Ty, idb->getInt32Ty(), int32Ty, nullptr));
    406         mainFunc->setCallingConv(CallingConv::C);
    407         idb->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
    408         auto args = mainFunc->arg_begin();
    409 
    410         Value * const fileDescriptor = &*(args++);
    411         fileDescriptor->setName("fileDescriptor");
    412         fileIdx = &*(args++);
    413         fileIdx->setName("fileIdx");
    414 
    415         ByteStream = mGrepDriver->addBuffer(make_unique<SourceBuffer>(idb, idb->getStreamSetTy(1, 8)));
    416 
    417         if (grepSource == GrepSource::File) {
    418             sourceK = mGrepDriver->addKernelInstance(make_unique<kernel::MMapSourceKernel>(idb, segmentSize));
    419             sourceK->setInitialArguments({fileDescriptor});
    420         } else { // if (grepSource == GrepSource::StdIn) {
    421             sourceK = mGrepDriver->addKernelInstance(make_unique<kernel::ReadSourceKernel>(idb, segmentSize));
    422             sourceK->setInitialArguments({idb->getInt32(STDIN_FILENO)});
    423         }
    424     }
    425 
    426     mGrepDriver->makeKernelCall(sourceK, {}, {ByteStream});
    427     StreamSetBuffer * BasisBits = mGrepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(8, 1), segmentSize * bufferSegments));
    428    
    429     kernel::Kernel * s2pk = mGrepDriver->addKernelInstance(make_unique<kernel::S2PKernel>(idb));
    430     mGrepDriver->makeKernelCall(s2pk, {ByteStream}, {BasisBits});
    431    
    432     kernel::Kernel * linebreakK = mGrepDriver->addKernelInstance(make_unique<kernel::LineBreakKernelBuilder>(idb, encodingBits));
    433     StreamSetBuffer * LineBreakStream = mGrepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
    434     mGrepDriver->makeKernelCall(linebreakK, {BasisBits}, {LineBreakStream});
    435    
    436     const auto n = REs.size();
    437 
    438     std::vector<StreamSetBuffer *> MatchResultsBufs(n);
    439 
    440     for(unsigned i = 0; i < n; ++i){
    441         StreamSetBuffer * MatchResults = mGrepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
    442         kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance(make_unique<kernel::ICGrepKernel>(idb, REs[i]));
    443         mGrepDriver->makeKernelCall(icgrepK, {BasisBits, LineBreakStream}, {MatchResults});
    444         MatchResultsBufs[i] = MatchResults;
    445     }
    446     StreamSetBuffer * MergedResults = MatchResultsBufs[0];
    447     if (REs.size() > 1) {
    448         MergedResults = mGrepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
    449         kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance(make_unique<kernel::StreamsMerge>(idb, 1, REs.size()));
    450         mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
    451     }
    452    
    453     if (InvertMatchFlag) {
    454         kernel::Kernel * invertK = mGrepDriver->addKernelInstance(make_unique<kernel::InvertMatchesKernel>(idb));
    455         StreamSetBuffer * OriginalMatches = MergedResults;
    456         MergedResults = mGrepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
    457         mGrepDriver->makeKernelCall(invertK, {OriginalMatches, LineBreakStream}, {MergedResults});
    458     }
    459     if (MatchLimit > 0) {
    460         kernel::Kernel * untilK = mGrepDriver->addKernelInstance(make_unique<kernel::UntilNkernel>(idb));
    461         untilK->setInitialArguments({idb->getSize(MatchLimit)});
    462         StreamSetBuffer * AllMatches = MergedResults;
    463         MergedResults = mGrepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
    464         mGrepDriver->makeKernelCall(untilK, {AllMatches}, {MergedResults});
    465     }
    466     if (grepMode != NormalMode) {
    467         kernel::Kernel * matchCountK = mGrepDriver->addKernelInstance(make_unique<kernel::MatchCount>(idb));
    468         mGrepDriver->makeKernelCall(matchCountK, {MergedResults}, {});
    469         mGrepDriver->generatePipelineIR();
    470         idb->setKernel(matchCountK);
    471         Value * matchedLineCount = idb->getScalarField("matchedLineCount");
    472         matchedLineCount = idb->CreateZExt(matchedLineCount, int64Ty);
    473         idb->CreateRet(matchedLineCount);
    474     } else {
    475         kernel::Kernel * scanMatchK = mGrepDriver->addKernelInstance(make_unique<kernel::ScanMatchKernel>(idb, grepType, encodingBits));
    476         scanMatchK->setInitialArguments({fileIdx});
    477         mGrepDriver->makeKernelCall(scanMatchK, {MergedResults, LineBreakStream, ByteStream}, {});
    478         switch (grepType) {
    479             case GrepType::Normal:
    480                 if (UTF_16) {
    481                     mGrepDriver->LinkFunction(*scanMatchK, "matcher", &wrapped_report_match<uint16_t>);
    482                 } else {
    483                     mGrepDriver->LinkFunction(*scanMatchK, "matcher", &wrapped_report_match<uint8_t>);
    484                 }
    485                 break;
    486             case GrepType::NameExpression:
    487                 mGrepDriver->LinkFunction(*scanMatchK, "matcher", &insert_codepoints);
    488                 break;
    489             case GrepType::PropertyValue:
    490                 mGrepDriver->LinkFunction(*scanMatchK, "matcher", &insert_property_values);
    491                 break;
    492         }
    493         mGrepDriver->generatePipelineIR();
    494         idb->CreateRet(idb->getInt64(0));
    495     }
    496     mGrepDriver->finalizeObject();
    497 }
    498 
    499 re::CC * GrepEngine::grepCodepoints() {
    500     parsedCodePointSet = re::makeCC();
    501     char * mFileBuffer = getUnicodeNameDataPtr();
    502     size_t mFileSize = getUnicodeNameDataSize();
    503     doGrep(mFileBuffer, mFileSize, 0);
    504     return parsedCodePointSet;
    505 }
    506 
    507 const std::vector<std::string> & GrepEngine::grepPropertyValues(const std::string& propertyName) {
     543
     544const std::vector<std::string> & grepPropertyValues(const std::string& propertyName, re::RE * propertyValuePattern) {
    508545    enum { MaxSupportedVectorWidthInBytes = 32 };
    509546    AlignedAllocator<char, MaxSupportedVectorWidthInBytes> alloc;
     
    516553    std::memcpy(aligned, str.data(), n);
    517554    std::memset(aligned + n, 0, MaxSupportedVectorWidthInBytes);
    518     doGrep(aligned, n, 0);
     555   
     556    const unsigned segmentSize = 8;
     557   
     558    ParabixDriver pxDriver("propertyValueEngine");
     559    auto & idb = pxDriver.getBuilder();
     560    Module * M = idb->getModule();
     561   
     562    Function * mainFunc = cast<Function>(M->getOrInsertFunction("Main", idb->getVoidTy(), idb->getInt8PtrTy(), idb->getSizeTy(), nullptr));
     563    mainFunc->setCallingConv(CallingConv::C);
     564    auto args = mainFunc->arg_begin();
     565    Value * const buffer = &*(args++);
     566    buffer->setName("buffer");
     567    Value * length = &*(args++);
     568    length->setName("length");
     569   
     570    idb->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
     571   
     572    StreamSetBuffer * ByteStream = pxDriver.addBuffer(make_unique<SourceBuffer>(idb, idb->getStreamSetTy(1, 8)));
     573    kernel::Kernel * sourceK = pxDriver.addKernelInstance(make_unique<kernel::MemorySourceKernel>(idb, idb->getInt8PtrTy(), segmentSize));
     574    sourceK->setInitialArguments({buffer, length});
     575    pxDriver.makeKernelCall(sourceK, {}, {ByteStream});
     576   
     577    StreamSetBuffer * BasisBits = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(8, 1), segmentSize));
     578   
     579    kernel::Kernel * s2pk = pxDriver.addKernelInstance(make_unique<kernel::S2PKernel>(idb));
     580    pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
     581   
     582    kernel::Kernel * linebreakK = pxDriver.addKernelInstance(make_unique<kernel::LineBreakKernelBuilder>(idb, 8));
     583    StreamSetBuffer * LineBreakStream = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize));
     584    pxDriver.makeKernelCall(linebreakK, {BasisBits}, {LineBreakStream});
     585   
     586    StreamSetBuffer * MatchResults = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize));
     587    kernel::Kernel * icgrepK = pxDriver.addKernelInstance(make_unique<kernel::ICGrepKernel>(idb, propertyValuePattern));
     588    pxDriver.makeKernelCall(icgrepK, {BasisBits, LineBreakStream}, {MatchResults});
     589   
     590    kernel::Kernel * scanMatchK = pxDriver.addKernelInstance(make_unique<kernel::ScanMatchKernel>(idb, GrepType::PropertyValue, 8));
     591    scanMatchK->setInitialArguments({idb->getInt32(0)});
     592    pxDriver.makeKernelCall(scanMatchK, {MatchResults, LineBreakStream, ByteStream}, {});
     593    pxDriver.LinkFunction(*scanMatchK, "matcher", &insert_property_values);
     594    pxDriver.generatePipelineIR();
     595    idb->CreateRetVoid();
     596    pxDriver.finalizeObject();
     597   
     598    typedef void (*GrepFunctionType)(const char * buffer, const size_t length);
     599    auto f = reinterpret_cast<GrepFunctionType>(pxDriver.getMain());
     600    f(aligned, n);
     601   
    519602    alloc.deallocate(aligned, 0);
    520603    return parsedPropertyValues;
    521604}
    522605
    523 GrepEngine::GrepEngine()
    524 : mGrepDriver(nullptr) {
    525 
    526 }
    527 
    528 GrepEngine::~GrepEngine() {
    529     delete mGrepDriver;
    530 }
    531 
    532 }
     606   
     607}
  • icGREP/icgrep-devel/icgrep/grep_engine.h

    r5476 r5481  
    2828    ~GrepEngine();
    2929
    30     void grepCodeGen(std::vector<re::RE *> REs, GrepModeType grepMode, bool UTF_16, GrepSource grepSource, GrepType grepType = GrepType::Normal);
     30    void grepCodeGen(std::vector<re::RE *> REs, GrepModeType grepMode, bool UTF_16, GrepSource grepSource);
    3131
    3232    void grepCodeGen_nvptx(std::vector<re::RE *> REs, GrepModeType grepMode, bool UTF_16);
     
    3737
    3838    uint64_t doGrep(const int32_t fileDescriptor, const uint32_t fileIdx) const;
    39    
    40     void doGrep(const char * buffer, const uint64_t length, const uint32_t fileIdx) const;
    41 
    42     re::CC * grepCodepoints();
    43 
    44     const std::vector<std::string> & grepPropertyValues(const std::string & propertyName);
    45    
     39       
    4640private:
    4741   
     
    4943};
    5044
     45void initFileResult(std::vector<std::string> filenames);
     46   
     47void PrintResult(GrepModeType grepMode, std::vector<size_t> & total_CountOnly);
     48   
    5149
    52 re::CC * getParsedCodePointSet();
    53 void setParsedCodePointSet();
     50re::CC * grepCodepoints(re::RE * pattern, char * UnicodeDataBuffer, size_t bufferLength);
     51   
     52const std::vector<std::string> & grepPropertyValues(const std::string& propertyName, re::RE * propertyValuePattern);
    5453
    55 void setParsedPropertyValues();
    56 
    57 void initFileResult(std::vector<std::string> filenames);
    58 void PrintResult(GrepModeType grepMode, std::vector<size_t> & total_CountOnly);
    5954}
    6055
  • icGREP/icgrep-devel/icgrep/re/re_parser.cpp

    r5473 r5481  
    2222#include <re/re_assertion.h>
    2323#include <re/printer_re.h>
     24#include <UCD/UnicodeNameData.h>
    2425#include <UCD/resolve_properties.h>
    2526#include <UCD/CaseFolding_txt.h>
     
    643644RE * RE_Parser::parseRegexPropertyValue(const std::string & propName, const std::string& regexValue) {
    644645    RE * propValueRe = RE_Parser::parse("^" + regexValue + "$", fModeFlagSet, mReSyntax);
    645     grep::GrepEngine engine;
    646     engine.grepCodeGen({ propValueRe }, grep::NormalMode, false, GrepSource::Internal, GrepType::PropertyValue);
    647     const auto matches = engine.grepPropertyValues(propName);
     646    const auto matches = grep::grepPropertyValues(propName, propValueRe);
    648647    if (matches.empty()) {
    649648        ParseFailure("regex " + regexValue + " match no property values");
     
    676675    RE * embedded = makeSeq({mMemoizer.memoize(makeCC(0x3B)), makeRep(makeAny(), 0, Rep::UNBOUNDED_REP), nameRE});
    677676   
    678     grep::GrepEngine engine;
    679     engine.grepCodeGen({ embedded }, grep::NormalMode, false, GrepSource::Internal, GrepType::NameExpression);
    680     CC * codepoints = engine.grepCodepoints();
     677    CC * codepoints = grep::grepCodepoints(embedded, getUnicodeNameDataPtr(), getUnicodeNameDataSize());
    681678   
    682679    if (codepoints) {
Note: See TracChangeset for help on using the changeset viewer.