Ignore:
Timestamp:
Mar 7, 2017, 2:09:25 PM (2 years ago)
Author:
xuedongx
Message:

delete the linebreak computation in icgrep

Location:
icGREP/icgrep-devel/icgrep
Files:
2 deleted
6 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r5343 r5357  
    7676target_link_libraries (RegExpCompiler RegExpADT)
    7777
    78 add_executable(icgrep icgrep.cpp toolchain.cpp grep_engine.cpp kernels/scanmatchgen.cpp kernels/cc_kernel.cpp kernels/cc_scan_kernel.cpp kernels/unicode_linebreak_kernel.cpp kernels/streams_merge.cpp kernels/match_count.cpp)
     78add_executable(icgrep icgrep.cpp toolchain.cpp grep_engine.cpp kernels/scanmatchgen.cpp kernels/cc_kernel.cpp kernels/cc_scan_kernel.cpp kernels/linebreak_kernel.cpp kernels/streams_merge.cpp kernels/match_count.cpp)
    7979add_executable(u8u16 u8u16.cpp toolchain.cpp)
    8080add_executable(base64 base64.cpp kernels/radix64.cpp toolchain.cpp)
  • icGREP/icgrep-devel/icgrep/grep_engine.cpp

    r5353 r5357  
    1717#include <UCD/resolve_properties.h>
    1818#include <kernels/cc_kernel.h>
    19 #include <kernels/unicode_linebreak_kernel.h>
     19#include <kernels/linebreak_kernel.h>
    2020#include <kernels/streams_merge.h>
    2121#include <kernels/match_count.h>
     
    4040#include <util/aligned_allocator.h>
    4141
    42 #define UNICODE_LINE_BREAK (!re::AlgorithmOptionIsSet(re::DisableUnicodeLineBreak))
    4342
    4443using namespace parabix;
     
    295294    kernel::S2PKernel  s2pk(iBuilder);
    296295    s2pk.generateKernel({&ByteStream}, {&BasisBits});
    297 
    298     std::vector<re::CC *> LF;
    299     LF.push_back(re::makeCC(0x0A));
    300    
    301     kernel::UnicodeLineBreakKernelBuilder unicodelbK(iBuilder, "unicodelinebreak", encodingBits);
    302     kernel::ParabixCharacterClassKernelBuilder linefeedK(iBuilder, "linefeed", LF, encodingBits);
    303 
     296   
    304297    std::vector<pablo::PabloKernel *> icgrepKs;
    305298    std::vector<StreamSetBuffer *> MatchResultsBufs;
    306299
    307300    for(unsigned i=0; i<REs.size(); i++){   
    308         pablo::PabloKernel * icgrepK = new pablo::PabloKernel(iBuilder, "icgrep"+std::to_string(i), {Binding{iBuilder->getStreamSetTy(8), "basis"}});
     301        pablo::PabloKernel * icgrepK = new pablo::PabloKernel(iBuilder, "icgrep"+std::to_string(i), {Binding{iBuilder->getStreamSetTy(8), "basis"}, Binding{iBuilder->getStreamSetTy(1, 1), "linebreak"}});
    309302        re::re2pablo_compiler(icgrepK, re::regular_expression_passes(REs[i]), false);
    310303        pablo_function_passes(icgrepK);
     
    325318    streamsMergeK.generateKernel(MatchResultsBufs, {&mergedResults});
    326319
     320    kernel::LineBreakKernelBuilder linebreakK(iBuilder, "lb", encodingBits);
     321    CircularBuffer LineBreakStream(iBuilder, iBuilder->getStreamSetTy(1, 1), segmentSize * bufferSegments);
     322    LineBreakStream.allocateBuffer();
     323    linebreakK.generateKernel({&BasisBits}, {&LineBreakStream});
     324   
     325    KernelList.push_back(&linebreakK);
    327326    for(unsigned i=0; i<REs.size(); i++){
    328         icgrepKs[i]->generateKernel({&BasisBits}, {MatchResultsBufs[i]});
     327        icgrepKs[i]->generateKernel({&BasisBits, &LineBreakStream}, {MatchResultsBufs[i]});
    329328        KernelList.push_back(icgrepKs[i]);
    330329    }
     
    348347    }
    349348    else{
    350         pablo::PabloKernel *linebreakK = UNICODE_LINE_BREAK ? &cast<pablo::PabloKernel>(unicodelbK) :  &cast<pablo::PabloKernel>(linefeedK);
    351         CircularBuffer LineBreakStream(iBuilder, iBuilder->getStreamSetTy(1, 1), segmentSize * bufferSegments);
    352         LineBreakStream.allocateBuffer();
    353         linebreakK->generateKernel({&BasisBits}, {&LineBreakStream});
    354 
    355349        kernel::ScanMatchKernel scanMatchK(iBuilder, mGrepType);
    356350        scanMatchK.generateKernel({&mergedResults, &LineBreakStream}, {});               
    357351        scanMatchK.setInitialArguments({iBuilder->CreateBitCast(inputStream, int8PtrTy), fileSize, fileIdx});
    358352
    359         KernelList.push_back(linebreakK);
    360353        KernelList.push_back(&scanMatchK);
    361354
     
    491484    s2pk.generateKernel({&ByteStream}, {&BasisBits});
    492485   
    493     std::vector<re::CC *> LF;
    494     LF.push_back(re::makeCC(0x0A));
    495    
    496     kernel::UnicodeLineBreakKernelBuilder unicodelbK(iBuilder, "unicodelinebreak", encodingBits);
    497     kernel::ParabixCharacterClassKernelBuilder linefeedK(iBuilder, "linefeed", LF, encodingBits);
    498 
    499     pablo::PabloKernel *linebreakK = UNICODE_LINE_BREAK ? &cast<pablo::PabloKernel>(unicodelbK) :  &cast<pablo::PabloKernel>(linefeedK);
     486    kernel::LineBreakKernelBuilder linebreakK(iBuilder, "lb", encodingBits);
    500487    CircularBuffer LineBreakStream(iBuilder, iBuilder->getStreamSetTy(1, 1), segmentSize * bufferSegments);
     488
     489    linebreakK.generateKernel({&BasisBits}, {&LineBreakStream});
    501490    LineBreakStream.allocateBuffer();
    502     linebreakK->generateKernel({&BasisBits}, {&LineBreakStream});
    503 
    504     pablo::PabloKernel icgrepK(iBuilder, "icgrep", {Binding{iBuilder->getStreamSetTy(8), "basis"}});
     491
     492    pablo::PabloKernel icgrepK(iBuilder, "icgrep", {Binding{iBuilder->getStreamSetTy(8), "basis"}, Binding{iBuilder->getStreamSetTy(1, 1), "linebreak"}});
    505493    re::re2pablo_compiler(&icgrepK, re::regular_expression_passes(re_ast), CountOnly);
    506494    pablo_function_passes(&icgrepK);
     
    510498
    511499    if (CountOnly) {
    512         icgrepK.generateKernel({&BasisBits}, {});
     500        icgrepK.generateKernel({&BasisBits, &LineBreakStream}, {});
    513501        if (pipelineParallel) {
    514             generatePipelineParallel(iBuilder, {&mmapK, &s2pk, &icgrepK});
     502            generatePipelineParallel(iBuilder, {&mmapK, &s2pk, &linebreakK, &icgrepK});
    515503        } else if (segmentPipelineParallel) {
    516             generateSegmentParallelPipeline(iBuilder, {&mmapK, &s2pk, &icgrepK});
     504            generateSegmentParallelPipeline(iBuilder, {&mmapK, &s2pk, &linebreakK, &icgrepK});
    517505        } else {
    518             generatePipelineLoop(iBuilder, {&mmapK, &s2pk, &icgrepK});
     506            generatePipelineLoop(iBuilder, {&mmapK, &s2pk, &linebreakK, &icgrepK});
    519507        }
    520508        iBuilder->CreateRet(icgrepK.createGetAccumulatorCall(icgrepK.getInstance(), "matchedLineCount"));
     
    525513            MatchResults.setStreamSetBuffer(outputStream, fileSize);
    526514
    527             icgrepK.generateKernel({&BasisBits},  {&MatchResults});
    528             generatePipelineLoop(iBuilder, {&mmapK, &s2pk, &icgrepK});
     515            icgrepK.generateKernel({&BasisBits, &LineBreakStream},  {&MatchResults});
     516            generatePipelineLoop(iBuilder, {&mmapK, &s2pk, &linebreakK, &icgrepK});
    529517
    530518        }
     
    534522            MatchResults.allocateBuffer();
    535523
    536             icgrepK.generateKernel({&BasisBits}, {&MatchResults});
     524            icgrepK.generateKernel({&BasisBits, &LineBreakStream}, {&MatchResults});
    537525
    538526            kernel::ScanMatchKernel scanMatchK(iBuilder, mGrepType);
     
    541529           
    542530            if (pipelineParallel) {
    543                 generatePipelineParallel(iBuilder, {&mmapK, &s2pk, &icgrepK, linebreakK, &scanMatchK});
     531                generatePipelineParallel(iBuilder, {&mmapK, &s2pk, &linebreakK, &icgrepK, &scanMatchK});
    544532            } else if (segmentPipelineParallel) {
    545                 generateSegmentParallelPipeline(iBuilder, {&mmapK, &s2pk, &icgrepK, linebreakK, &scanMatchK});
     533                generateSegmentParallelPipeline(iBuilder, {&mmapK, &s2pk, &linebreakK, &icgrepK, &scanMatchK});
    546534            } else {
    547                 generatePipelineLoop(iBuilder, {&mmapK, &s2pk, &icgrepK, linebreakK, &scanMatchK});
     535                generatePipelineLoop(iBuilder, {&mmapK, &s2pk, &linebreakK, &icgrepK, &scanMatchK});
    548536            }
    549537        }
  • icGREP/icgrep-devel/icgrep/preprocess.cpp

    r5341 r5357  
    1616#include <IR_Gen/idisa_builder.h>
    1717#include <IR_Gen/idisa_target.h>
     18#include <kernels/linebreak_kernel.h>
    1819#include <kernels/streamset.h>
    1920#include <kernels/mmap_kernel.h>
     
    7778    mmapK.setInitialArguments({fileSize});
    7879
    79     std::vector<re::CC *> charClasses;
    80     charClasses.push_back(re::makeCC(0x0A));
    81     kernel::DirectCharacterClassKernelBuilder linefeedK(iBuilder, "linefeed", charClasses, 1);
    82     linefeedK.generateKernel({&ByteStream}, {&MatchResults});
     80    kernel::LineBreakKernelBuilder linebreakK(iBuilder, "lb", encodingBits);
     81    linebreakK.generateKernel({&ByteStream}, {&MatchResults});
    8382   
    8483    kernel::CCScanKernel scanMatchK(iBuilder, 1);
    8584    scanMatchK.generateKernel({&MatchResults}, {});
    8685   
    87     generatePipelineLoop(iBuilder, {&mmapK, &linefeedK, &scanMatchK});
     86    generatePipelineLoop(iBuilder, {&mmapK, &linebreakK, &scanMatchK});
    8887    iBuilder->CreateRetVoid();
    8988
  • icGREP/icgrep-devel/icgrep/re/re_compiler.cpp

    r5333 r5357  
    4141namespace re {
    4242
    43 void RE_Compiler::initializeRequiredStreams(const unsigned encodingBits) {
     43void RE_Compiler::initializeRequiredStreams(const unsigned encodingBits, Var * linebreak) {
    4444    if (encodingBits == 8) {
    45         RE_Compiler::initializeRequiredStreams_utf8();
     45        RE_Compiler::initializeRequiredStreams_utf8(linebreak);
    4646    } else if (encodingBits == 16) {
    47         RE_Compiler::initializeRequiredStreams_utf16();
    48     }
    49 }
    50 
    51 void RE_Compiler::initializeRequiredStreams_utf16() {
     47        RE_Compiler::initializeRequiredStreams_utf16(linebreak);
     48    }
     49}
     50
     51void RE_Compiler::initializeRequiredStreams_utf16(Var * linebreak) {
    5252    PabloAST * LF = mCCCompiler.compileCC("LF", makeCC(0x000A), mPB);
    5353    PabloAST * CR = mCCCompiler.compileCC("CR", makeCC(0x000D), mPB);
    54     PabloAST * LF_VT_FF_CR = mCCCompiler.compileCC(makeCC(0x000A, 0x000D));
    55     PabloAST * NEL = mCCCompiler.compileCC("NEL", makeCC(0x0085), mPB);
    56     PabloAST * LS_PS = mCCCompiler.compileCC("LS_PS", makeCC(0x2028, 0x2029), mPB);
    57     PabloAST * NEL_LS_PS = mPB.createOr(NEL, LS_PS, "NEL_LS_PS");
    58 
    5954    PabloAST * cr1 = mPB.createAdvance(CR, 1, "cr1");
    6055    mCRLF = mPB.createAnd(cr1, LF, "crlf");
     
    7772    mInitial = mPB.createOr(u16single, hi_surrogate, "initial");
    7873   
    79     PabloAST * LB_chars = mPB.createOr(LF_VT_FF_CR, NEL_LS_PS);
    80     PabloAST * UnicodeLineBreak = mPB.createAnd(LB_chars, mPB.createNot(mCRLF));  // count the CR, but not CRLF
    81     PabloAST * lb = UNICODE_LINE_BREAK ? UnicodeLineBreak : LF;
    82     PabloAST * unterminatedLineAtEOF = mPB.createAtEOF(mPB.createAdvance(mPB.createNot(LB_chars), 1));
    83     mLineBreak = mPB.createOr(lb, unterminatedLineAtEOF);
    84     mAny = mPB.createNot(lb, "any");
    85 }
    86 void RE_Compiler::initializeRequiredStreams_utf8() {
     74    mLineBreak = mPB.createExtract(linebreak, mPB.getInteger(0));
     75    mAny = mPB.createNot(mLineBreak, "any");
     76}
     77
     78void RE_Compiler::initializeRequiredStreams_utf8(Var * linebreak) {
    8779    PabloAST * LF = mCCCompiler.compileCC("LF", makeCC(0x0A), mPB);
    8880    PabloAST * CR = mCCCompiler.compileCC(makeCC(0x0D));
    89     PabloAST * LF_VT_FF_CR = mCCCompiler.compileCC(makeCC(0x0A, 0x0D));
    9081
    9182    Zeroes * const zero = mPB.createZeroes();
     
    10192    Var * valid_pfx = mPB.createVar("valid_pfx", zero);
    10293    Var * nonFinal = mPB.createVar("nonfinal", zero);
    103     Var * NEL_LS_PS = mPB.createVar("NEL_LS_PS", zero);
    10494
    10595    PabloAST * u8pfx = mCCCompiler.compileCC(makeCC(0xC0, 0xFF));
     
    117107    // Two-byte sequences
    118108    Var * u8scope22 = it.createVar("u8scope22", zero);
    119     Var * NEL = it.createVar("NEL", zero);
    120109    PabloBuilder it2 = PabloBuilder::Create(it);
    121110    it2.createAssign(u8scope22, it2.createAdvance(u8pfx2, 1));
    122     it2.createAssign(NEL, it2.createAnd(it2.createAdvance(mCCCompiler.compileCC(makeCC(0xC2), it2), 1), mCCCompiler.compileCC(makeCC(0x85), it2)));
    123111    it.createIf(u8pfx2, it2);
    124112
     
    127115    Var * u8scope32 = it.createVar("u8scope32", zero);
    128116    Var * u8scope3X = it.createVar("u8scope3X", zero);
    129     Var * LS_PS = it.createVar("LS_PS", zero);
    130117    Var * EX_invalid = it.createVar("EX_invalid", zero);
    131 
    132118    PabloBuilder it3 = PabloBuilder::Create(it);
    133119    it.createIf(u8pfx3, it3);
    134 
    135120    it3.createAssign(u8scope32, it3.createAdvance(u8pfx3, 1));
    136121    PabloAST * u8scope33 = it3.createAdvance(u8pfx3, 2);
    137122    it3.createAssign(u8scope3X, it3.createOr(u8scope32, u8scope33));
    138     PabloAST * E2_80 = it3.createAnd(it3.createAdvance(mCCCompiler.compileCC(makeCC(0xE2), it3), 1), mCCCompiler.compileCC(makeCC(0x80), it3));
    139     it3.createAssign(LS_PS, it3.createAnd(it3.createAdvance(E2_80, 1), mCCCompiler.compileCC(makeCC(0xA8,0xA9), it3)));
    140123    PabloAST * E0_invalid = it3.createAnd(it3.createAdvance(mCCCompiler.compileCC(makeCC(0xE0), it3), 1), mCCCompiler.compileCC(makeCC(0x80, 0x9F), it3));
    141124    PabloAST * ED_invalid = it3.createAnd(it3.createAdvance(mCCCompiler.compileCC(makeCC(0xED), it3), 1), mCCCompiler.compileCC(makeCC(0xA0, 0xBF), it3));
    142125    it3.createAssign(EX_invalid, it3.createOr(E0_invalid, ED_invalid));
    143 
    144     it.createAssign(NEL_LS_PS, it.createOr(NEL, LS_PS));
    145126
    146127    //
     
    179160
    180161
    181     PabloAST * LB_chars = mPB.createOr(LF_VT_FF_CR, NEL_LS_PS);
    182162    PabloAST * u8single = mPB.createAnd(mCCCompiler.compileCC(makeCC(0x00, 0x7F)), mPB.createNot(u8invalid));
    183163    mInitial = mPB.createOr(u8single, valid_pfx, "initial");
    184164    mFinal = mPB.createNot(mPB.createOr(mNonFinal, u8invalid), "final");
    185     PabloAST * UnicodeLineBreak = mPB.createAnd(LB_chars, mPB.createNot(mCRLF));  // count the CR, but not CRLF
    186     PabloAST * lb = UNICODE_LINE_BREAK ? UnicodeLineBreak : LF;
    187     PabloAST * unterminatedLineAtEOF = mPB.createAtEOF(mPB.createAdvance(mPB.createNot(LB_chars), 1));
    188     mLineBreak = mPB.createOr(lb, unterminatedLineAtEOF);
    189     mAny = mPB.createNot(lb, "any");
     165    mLineBreak = mPB.createExtract(linebreak, mPB.getInteger(0));
     166    mAny = mPB.createNot(mLineBreak, "any");
    190167}
    191168
  • icGREP/icgrep-devel/icgrep/re/re_compiler.h

    r5308 r5357  
    1010#include <re/re_seq.h>  // for Seq
    1111#include <boost/container/flat_map.hpp>
     12#include <pablo/builder.hpp>
    1213#include <vector>       // for vector<>::iterator
    1314namespace cc { class CC_Compiler; }
     
    6061
    6162    RE_Compiler(pablo::PabloKernel * kernel, cc::CC_Compiler & ccCompiler, bool CountOnly = false);
    62     void initializeRequiredStreams(const unsigned encodingBits);
     63    void initializeRequiredStreams(const unsigned encodingBits, pablo::Var * linebreak);
    6364    void compileUnicodeNames(RE *& re);
    6465    void finalizeMatchResult(MarkerType match_result, bool InvertMatches = false);
     
    9192    };
    9293
    93     void initializeRequiredStreams_utf8();
    94     void initializeRequiredStreams_utf16();
     94    void initializeRequiredStreams_utf8(pablo::Var * linebreak);
     95    void initializeRequiredStreams_utf16(pablo::Var * linebreak);
    9596    MarkerType compile(RE * re, pablo::PabloBuilder & cg);
    9697
  • icGREP/icgrep-devel/icgrep/re/re_toolchain.cpp

    r5310 r5357  
    8787void re2pablo_compiler(PabloKernel * kernel, RE * re_ast, const bool CountOnly) {
    8888    Var * const basis = kernel->getInputStreamVar("basis");
     89    Var * const linebreak = kernel->getInputStreamVar("linebreak");
    8990    cc::CC_Compiler cc_compiler(kernel, basis);
    9091    re::RE_Compiler re_compiler(kernel, cc_compiler, CountOnly);
    91     re_compiler.initializeRequiredStreams(basis->getType()->getArrayNumElements());
     92    re_compiler.initializeRequiredStreams(basis->getType()->getArrayNumElements(), linebreak);
    9293    re_compiler.compileUnicodeNames(re_ast);
    9394    re_compiler.finalizeMatchResult(re_compiler.compile(re_ast), AlgorithmOptions.isSet(InvertMatches));
Note: See TracChangeset for help on using the changeset viewer.