Ignore:
Timestamp:
Jul 11, 2017, 1:57:37 PM (21 months ago)
Author:
cameron
Message:

Factor out required streams into a separate kernel

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/re/re_compiler.cpp

    r5558 r5561  
    4040
    4141namespace re {
    42 
    43 void RE_Compiler::initializeRequiredStreams(const unsigned encodingBits, Var * linebreak) {
    44     if (encodingBits == 8) {
    45         RE_Compiler::initializeRequiredStreams_utf8(linebreak);
    46     } else if (encodingBits == 16) {
    47         RE_Compiler::initializeRequiredStreams_utf16(linebreak);
    48     }
    49 }
    50 
    51 void RE_Compiler::initializeRequiredStreams_utf16(Var * linebreak) {
    52     PabloAST * LF = mCCCompiler.compileCC("LF", makeCC(0x000A), mPB);
    53     PabloAST * CR = mCCCompiler.compileCC("CR", makeCC(0x000D), mPB);
    54     PabloAST * cr1 = mPB.createAdvance(CR, 1, "cr1");
    55     mCRLF = mPB.createAnd(cr1, LF, "crlf");
    56 
    57     PabloAST * hi_surrogate = mCCCompiler.compileCC(makeCC(0xD800, 0xDBFF));
    58     //PabloAST * lo_surrogate = mCCCompiler.compileCC(makeCC(0xDC00, 0xDFFF));
    59     PabloAST * u16hi_hi_surrogate = mCCCompiler.compileCC(makeCC(0xD800, 0xDB00));    //u16hi_hi_surrogate = [\xD8-\xDB]
    60     PabloAST * u16hi_lo_surrogate = mCCCompiler.compileCC(makeCC(0xDC00, 0xDF00));    //u16hi_lo_surrogate = [\xDC-\xDF]
    61 
    62     PabloAST * invalidTemp = mPB.createAdvance(u16hi_hi_surrogate, 1, "InvalidTemp");
    63     PabloAST * u16invalid = mPB.createXor(invalidTemp, u16hi_lo_surrogate, "u16invalid");
    64     // errors.Unicode=pablo.Advance(u16hi_hi_surrogate) ^ u16hi_lo_surrogate
    65     PabloAST * u16valid = mPB.createNot(u16invalid, "u16valid");
    66 
    67     PabloAST * u16single_temp = mPB.createOr(mCCCompiler.compileCC(makeCC(0x0000, 0xD7FF)), mCCCompiler.compileCC(makeCC(0xE000, 0xFFFF)));
    68     PabloAST * u16single = mPB.createAnd(u16single_temp, mPB.createNot(u16invalid));
    69    
    70     mNonFinal = mPB.createAnd(hi_surrogate, u16valid, "nonfinal");
    71     mFinal = mPB.createNot(mPB.createOr(mNonFinal, u16invalid), "final");
    72     mInitial = mPB.createOr(u16single, hi_surrogate, "initial");
    73    
    74     mLineBreak = mPB.createExtract(linebreak, mPB.getInteger(0));
    75     mAny = mPB.createNot(mLineBreak, "any");
    76 }
    77 
    78 void RE_Compiler::initializeRequiredStreams_utf8(Var * linebreak) {
    79     PabloAST * LF = mCCCompiler.compileCC("LF", makeCC(0x0A), mPB);
    80     PabloAST * CR = mCCCompiler.compileCC(makeCC(0x0D));
    81 
    82     Zeroes * const zero = mPB.createZeroes();
    83     Var * crlf = mPB.createVar("crlf", zero);
    84     PabloBuilder crb = PabloBuilder::Create(mPB);
    85     PabloAST * cr1 = crb.createAdvance(CR, 1, "cr1");
    86     crb.createAssign(crlf, crb.createAnd(cr1, LF));
    87     mPB.createIf(CR, crb);
    88 
    89     mCRLF = crlf;
    90 
    91     Var * u8invalid = mPB.createVar("u8invalid", zero);
    92     Var * valid_pfx = mPB.createVar("valid_pfx", zero);
    93     Var * nonFinal = mPB.createVar("nonfinal", zero);
    94 
    95     PabloAST * u8pfx = mCCCompiler.compileCC(makeCC(0xC0, 0xFF));
    96     PabloBuilder it = PabloBuilder::Create(mPB);
    97     mPB.createIf(u8pfx, it);
    98 
    99     mNonFinal = nonFinal;
    100 
    101     PabloAST * u8pfx2 = mCCCompiler.compileCC(makeCC(0xC2, 0xDF), it);
    102     PabloAST * u8pfx3 = mCCCompiler.compileCC(makeCC(0xE0, 0xEF), it);
    103     PabloAST * u8pfx4 = mCCCompiler.compileCC(makeCC(0xF0, 0xF4), it);
    104     PabloAST * u8suffix = mCCCompiler.compileCC("u8suffix", makeCC(0x80, 0xBF), it);
    105 
    106     //
    107     // Two-byte sequences
    108     Var * u8scope22 = it.createVar("u8scope22", zero);
    109     PabloBuilder it2 = PabloBuilder::Create(it);
    110     it2.createAssign(u8scope22, it2.createAdvance(u8pfx2, 1));
    111     it.createIf(u8pfx2, it2);
    112 
    113     //
    114     // Three-byte sequences
    115     Var * u8scope32 = it.createVar("u8scope32", zero);
    116     Var * u8scope3X = it.createVar("u8scope3X", zero);
    117     Var * EX_invalid = it.createVar("EX_invalid", zero);
    118     PabloBuilder it3 = PabloBuilder::Create(it);
    119     it.createIf(u8pfx3, it3);
    120     it3.createAssign(u8scope32, it3.createAdvance(u8pfx3, 1));
    121     PabloAST * u8scope33 = it3.createAdvance(u8pfx3, 2);
    122     it3.createAssign(u8scope3X, it3.createOr(u8scope32, u8scope33));
    123     PabloAST * E0_invalid = it3.createAnd(it3.createAdvance(mCCCompiler.compileCC(makeCC(0xE0), it3), 1), mCCCompiler.compileCC(makeCC(0x80, 0x9F), it3));
    124     PabloAST * ED_invalid = it3.createAnd(it3.createAdvance(mCCCompiler.compileCC(makeCC(0xED), it3), 1), mCCCompiler.compileCC(makeCC(0xA0, 0xBF), it3));
    125     it3.createAssign(EX_invalid, it3.createOr(E0_invalid, ED_invalid));
    126 
    127     //
    128     // Four-byte sequences
    129     Var * u8scope4nonfinal = it.createVar("u8scope4nonfinal", zero);
    130     Var * u8scope4X = it.createVar("u8scope4X", zero);
    131     Var * FX_invalid = it.createVar("FX_invalid", zero);
    132     PabloBuilder it4 = PabloBuilder::Create(it);
    133     it.createIf(u8pfx4, it4);
    134     PabloAST * u8scope42 = it4.createAdvance(u8pfx4, 1, "u8scope42");
    135     PabloAST * u8scope43 = it4.createAdvance(u8scope42, 1, "u8scope43");
    136     PabloAST * u8scope44 = it4.createAdvance(u8scope43, 1, "u8scope44");
    137     it4.createAssign(u8scope4nonfinal, it4.createOr(u8scope42, u8scope43));
    138     it4.createAssign(u8scope4X, it4.createOr(u8scope4nonfinal, u8scope44));
    139     PabloAST * F0_invalid = it4.createAnd(it4.createAdvance(mCCCompiler.compileCC(makeCC(0xF0), it4), 1), mCCCompiler.compileCC(makeCC(0x80, 0x8F), it4));
    140     PabloAST * F4_invalid = it4.createAnd(it4.createAdvance(mCCCompiler.compileCC(makeCC(0xF4), it4), 1), mCCCompiler.compileCC(makeCC(0x90, 0xBF), it4));
    141     it4.createAssign(FX_invalid, it4.createOr(F0_invalid, F4_invalid));
    142 
    143     //
    144     // Invalid cases
    145     PabloAST * anyscope = it.createOr(u8scope22, it.createOr(u8scope3X, u8scope4X));
    146     PabloAST * legalpfx = it.createOr(it.createOr(u8pfx2, u8pfx3), u8pfx4);
    147     //  Any scope that does not have a suffix byte, and any suffix byte that is not in
    148     //  a scope is a mismatch, i.e., invalid UTF-8.
    149     PabloAST * mismatch = it.createXor(anyscope, u8suffix);
    150     //
    151     PabloAST * EF_invalid = it.createOr(EX_invalid, FX_invalid);
    152     PabloAST * pfx_invalid = it.createXor(u8pfx, legalpfx);
    153     it.createAssign(u8invalid, it.createOr(pfx_invalid, it.createOr(mismatch, EF_invalid)));
    154     PabloAST * u8valid = it.createNot(u8invalid, "u8valid");
    155     //
    156     //
    157 
    158     it.createAssign(valid_pfx, it.createAnd(u8pfx, u8valid));
    159     it.createAssign(mNonFinal, it.createAnd(it.createOr(it.createOr(u8pfx, u8scope32), u8scope4nonfinal), u8valid));
    160 
    161 
    162     PabloAST * u8single = mPB.createAnd(mCCCompiler.compileCC(makeCC(0x00, 0x7F)), mPB.createNot(u8invalid));
    163     mInitial = mPB.createOr(u8single, valid_pfx, "initial");
    164     mFinal = mPB.createNot(mPB.createOr(mNonFinal, u8invalid), "final");
    165     mLineBreak = mPB.createExtract(linebreak, mPB.getInteger(0));
    166     mAny = mPB.createNot(mLineBreak, "any");
    167 }
    16842
    16943
     
    647521, mPB(ccCompiler.getBuilder())
    648522, mCompiledName(&mBaseMap) {
    649 
     523    Var * const linebreak = mKernel->getInputStreamVar("linebreak");
     524    mLineBreak = mPB.createExtract(linebreak, mPB.getInteger(0));
     525    mAny = mPB.createNot(mLineBreak, "any");
     526    Var * const required = mKernel->getInputStreamVar("required");
     527    mInitial = mPB.createExtract(required, mPB.getInteger(0));
     528    mNonFinal = mPB.createExtract(required, mPB.getInteger(1));
     529    mFinal = mPB.createExtract(required, mPB.getInteger(2));
     530    mCRLF = mPB.createExtract(required, mPB.getInteger(3));
    650531}
    651532
Note: See TracChangeset for help on using the changeset viewer.