Ignore:
Timestamp:
Mar 7, 2017, 2:09:25 PM (2 years ago)
Author:
xuedongx
Message:

delete the linebreak computation in icgrep

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/re/re_compiler.cpp

    r5333 r5357  
    4141namespace re {
    4242
    43 void RE_Compiler::initializeRequiredStreams(const unsigned encodingBits) {
     43void RE_Compiler::initializeRequiredStreams(const unsigned encodingBits, Var * linebreak) {
    4444    if (encodingBits == 8) {
    45         RE_Compiler::initializeRequiredStreams_utf8();
     45        RE_Compiler::initializeRequiredStreams_utf8(linebreak);
    4646    } else if (encodingBits == 16) {
    47         RE_Compiler::initializeRequiredStreams_utf16();
    48     }
    49 }
    50 
    51 void RE_Compiler::initializeRequiredStreams_utf16() {
     47        RE_Compiler::initializeRequiredStreams_utf16(linebreak);
     48    }
     49}
     50
     51void RE_Compiler::initializeRequiredStreams_utf16(Var * linebreak) {
    5252    PabloAST * LF = mCCCompiler.compileCC("LF", makeCC(0x000A), mPB);
    5353    PabloAST * CR = mCCCompiler.compileCC("CR", makeCC(0x000D), mPB);
    54     PabloAST * LF_VT_FF_CR = mCCCompiler.compileCC(makeCC(0x000A, 0x000D));
    55     PabloAST * NEL = mCCCompiler.compileCC("NEL", makeCC(0x0085), mPB);
    56     PabloAST * LS_PS = mCCCompiler.compileCC("LS_PS", makeCC(0x2028, 0x2029), mPB);
    57     PabloAST * NEL_LS_PS = mPB.createOr(NEL, LS_PS, "NEL_LS_PS");
    58 
    5954    PabloAST * cr1 = mPB.createAdvance(CR, 1, "cr1");
    6055    mCRLF = mPB.createAnd(cr1, LF, "crlf");
     
    7772    mInitial = mPB.createOr(u16single, hi_surrogate, "initial");
    7873   
    79     PabloAST * LB_chars = mPB.createOr(LF_VT_FF_CR, NEL_LS_PS);
    80     PabloAST * UnicodeLineBreak = mPB.createAnd(LB_chars, mPB.createNot(mCRLF));  // count the CR, but not CRLF
    81     PabloAST * lb = UNICODE_LINE_BREAK ? UnicodeLineBreak : LF;
    82     PabloAST * unterminatedLineAtEOF = mPB.createAtEOF(mPB.createAdvance(mPB.createNot(LB_chars), 1));
    83     mLineBreak = mPB.createOr(lb, unterminatedLineAtEOF);
    84     mAny = mPB.createNot(lb, "any");
    85 }
    86 void RE_Compiler::initializeRequiredStreams_utf8() {
     74    mLineBreak = mPB.createExtract(linebreak, mPB.getInteger(0));
     75    mAny = mPB.createNot(mLineBreak, "any");
     76}
     77
     78void RE_Compiler::initializeRequiredStreams_utf8(Var * linebreak) {
    8779    PabloAST * LF = mCCCompiler.compileCC("LF", makeCC(0x0A), mPB);
    8880    PabloAST * CR = mCCCompiler.compileCC(makeCC(0x0D));
    89     PabloAST * LF_VT_FF_CR = mCCCompiler.compileCC(makeCC(0x0A, 0x0D));
    9081
    9182    Zeroes * const zero = mPB.createZeroes();
     
    10192    Var * valid_pfx = mPB.createVar("valid_pfx", zero);
    10293    Var * nonFinal = mPB.createVar("nonfinal", zero);
    103     Var * NEL_LS_PS = mPB.createVar("NEL_LS_PS", zero);
    10494
    10595    PabloAST * u8pfx = mCCCompiler.compileCC(makeCC(0xC0, 0xFF));
     
    117107    // Two-byte sequences
    118108    Var * u8scope22 = it.createVar("u8scope22", zero);
    119     Var * NEL = it.createVar("NEL", zero);
    120109    PabloBuilder it2 = PabloBuilder::Create(it);
    121110    it2.createAssign(u8scope22, it2.createAdvance(u8pfx2, 1));
    122     it2.createAssign(NEL, it2.createAnd(it2.createAdvance(mCCCompiler.compileCC(makeCC(0xC2), it2), 1), mCCCompiler.compileCC(makeCC(0x85), it2)));
    123111    it.createIf(u8pfx2, it2);
    124112
     
    127115    Var * u8scope32 = it.createVar("u8scope32", zero);
    128116    Var * u8scope3X = it.createVar("u8scope3X", zero);
    129     Var * LS_PS = it.createVar("LS_PS", zero);
    130117    Var * EX_invalid = it.createVar("EX_invalid", zero);
    131 
    132118    PabloBuilder it3 = PabloBuilder::Create(it);
    133119    it.createIf(u8pfx3, it3);
    134 
    135120    it3.createAssign(u8scope32, it3.createAdvance(u8pfx3, 1));
    136121    PabloAST * u8scope33 = it3.createAdvance(u8pfx3, 2);
    137122    it3.createAssign(u8scope3X, it3.createOr(u8scope32, u8scope33));
    138     PabloAST * E2_80 = it3.createAnd(it3.createAdvance(mCCCompiler.compileCC(makeCC(0xE2), it3), 1), mCCCompiler.compileCC(makeCC(0x80), it3));
    139     it3.createAssign(LS_PS, it3.createAnd(it3.createAdvance(E2_80, 1), mCCCompiler.compileCC(makeCC(0xA8,0xA9), it3)));
    140123    PabloAST * E0_invalid = it3.createAnd(it3.createAdvance(mCCCompiler.compileCC(makeCC(0xE0), it3), 1), mCCCompiler.compileCC(makeCC(0x80, 0x9F), it3));
    141124    PabloAST * ED_invalid = it3.createAnd(it3.createAdvance(mCCCompiler.compileCC(makeCC(0xED), it3), 1), mCCCompiler.compileCC(makeCC(0xA0, 0xBF), it3));
    142125    it3.createAssign(EX_invalid, it3.createOr(E0_invalid, ED_invalid));
    143 
    144     it.createAssign(NEL_LS_PS, it.createOr(NEL, LS_PS));
    145126
    146127    //
     
    179160
    180161
    181     PabloAST * LB_chars = mPB.createOr(LF_VT_FF_CR, NEL_LS_PS);
    182162    PabloAST * u8single = mPB.createAnd(mCCCompiler.compileCC(makeCC(0x00, 0x7F)), mPB.createNot(u8invalid));
    183163    mInitial = mPB.createOr(u8single, valid_pfx, "initial");
    184164    mFinal = mPB.createNot(mPB.createOr(mNonFinal, u8invalid), "final");
    185     PabloAST * UnicodeLineBreak = mPB.createAnd(LB_chars, mPB.createNot(mCRLF));  // count the CR, but not CRLF
    186     PabloAST * lb = UNICODE_LINE_BREAK ? UnicodeLineBreak : LF;
    187     PabloAST * unterminatedLineAtEOF = mPB.createAtEOF(mPB.createAdvance(mPB.createNot(LB_chars), 1));
    188     mLineBreak = mPB.createOr(lb, unterminatedLineAtEOF);
    189     mAny = mPB.createNot(lb, "any");
     165    mLineBreak = mPB.createExtract(linebreak, mPB.getInteger(0));
     166    mAny = mPB.createNot(mLineBreak, "any");
    190167}
    191168
Note: See TracChangeset for help on using the changeset viewer.